Skip to content

Commit

Permalink
Allow virtual threads to unmount when blocked on synchronized
Browse files Browse the repository at this point in the history
  • Loading branch information
pchilano committed Oct 16, 2024
1 parent e175060 commit d7c9ac7
Show file tree
Hide file tree
Showing 103 changed files with 2,174 additions and 351 deletions.
60 changes: 45 additions & 15 deletions src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,44 +170,65 @@ class StubFrame: public StackObj {
private:
StubAssembler* _sasm;
bool _return_state;
bool _use_pop_on_epilogue;

public:
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state=requires_return);
void load_argument(int offset_in_words, Register reg);
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
return_state_t return_state, bool use_pop_on_epilogue);

public:
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, bool use_pop_on_epilogue);
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state);
StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
~StubFrame();

void load_argument(int offset_in_words, Register reg);
};;

void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
set_info(name, must_gc_arguments);
enter();
}

void StubAssembler::epilogue() {
leave();
void StubAssembler::epilogue(bool use_pop) {
// Avoid using a leave instruction when this frame may
// have been frozen, since the current value of rfp
// restored from the stub would be invalid. We still
// must restore the rfp value saved on enter though.
if (use_pop) {
ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
} else {
leave();
}
ret(lr);
}

#define __ _sasm->

StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments, return_state_t return_state) {
_sasm = sasm;
_return_state = return_state;
StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
return_state_t return_state, bool use_pop_on_epilogue)
: _sasm(sasm), _return_state(return_state), _use_pop_on_epilogue(use_pop_on_epilogue) {
__ prologue(name, must_gc_arguments);
}

StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
bool use_pop_on_epilogue) :
StubFrame(sasm, name, must_gc_arguments, requires_return, use_pop_on_epilogue) {}

StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments,
return_state_t return_state) :
StubFrame(sasm, name, must_gc_arguments, return_state, /*use_pop_on_epilogue*/false) {}

StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) :
StubFrame(sasm, name, must_gc_arguments, requires_return, /*use_pop_on_epilogue*/false) {}

// load parameters that were stored with LIR_Assembler::store_parameter
// Note: offsets for store_parameter and load_argument must match
void StubFrame::load_argument(int offset_in_words, Register reg) {
__ load_parameter(offset_in_words, reg);
}

StubFrame::~StubFrame() {
if (_return_state == requires_return) {
__ epilogue();
} else {
__ should_not_reach_here();
}
__ epilogue(_use_pop_on_epilogue);
}

#undef __
Expand Down Expand Up @@ -252,7 +273,7 @@ static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {

for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
Register r = as_Register(i);
if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
if (r == rthread || (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding())) {
int sp_offset = cpu_reg_save_offsets[i];
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
Expand Down Expand Up @@ -337,6 +358,15 @@ void Runtime1::initialize_pd() {
}
}

// return: offset in 64-bit words.
uint Runtime1::runtime_blob_current_thread_offset(frame f) {
CodeBlob* cb = f.cb();
assert(cb == Runtime1::blob_for(C1StubId::monitorenter_id) ||
cb == Runtime1::blob_for(C1StubId::monitorenter_nofpu_id), "must be");
assert(cb != nullptr && cb->is_runtime_stub(), "invalid frame");
int offset = cpu_reg_save_offsets[rthread->encoding()];
return offset / 2; // SP offsets are in halfwords
}

// target: the entry point of the method that creates and posts the exception oop
// has_argument: true if the exception needs arguments (passed in rscratch1 and rscratch2)
Expand Down Expand Up @@ -862,7 +892,7 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) {
// fall through
case C1StubId::monitorenter_id:
{
StubFrame f(sasm, "monitorenter", dont_gc_arguments);
StubFrame f(sasm, "monitorenter", dont_gc_arguments, /*use_pop_on_epilogue*/true);
OopMap* map = save_live_registers(sasm, save_fpu_registers);

// Called with store_parameter and not C abi
Expand Down
50 changes: 47 additions & 3 deletions src/hotspot/cpu/aarch64/continuationFreezeThaw_aarch64.inline.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -129,6 +129,12 @@ void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
}
}

inline void FreezeBase::prepare_freeze_interpreted_top_frame(const frame& f) {
assert(*f.addr_at(frame::interpreter_frame_last_sp_offset) == 0, "should be null for top frame");
intptr_t* lspp = f.addr_at(frame::interpreter_frame_last_sp_offset);
*lspp = f.unextended_sp() - f.fp();
}

inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), "");
assert((f.at(frame::interpreter_frame_last_sp_offset) != 0)
Expand Down Expand Up @@ -235,7 +241,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
int fsize = FKind::size(hf);
intptr_t* frame_sp = caller.unextended_sp() - fsize;
if (bottom || caller.is_interpreted_frame()) {
int argsize = hf.compiled_frame_stack_argsize();
int argsize = FKind::stack_argsize(hf);

fsize += argsize;
frame_sp -= argsize;
Expand All @@ -253,7 +259,7 @@ template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame&
fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
} else {
fp = FKind::stub
? frame_sp + fsize - frame::sender_sp_offset // on AArch64, this value is used for the safepoint stub
? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
: *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
}
return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary?
Expand All @@ -277,6 +283,44 @@ inline void ThawBase::patch_pd(frame& f, const frame& caller) {
patch_callee_link(caller, caller.fp());
}

inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
intptr_t* fp = caller_sp - frame::sender_sp_offset;
patch_callee_link(f, fp);
}

inline intptr_t* ThawBase::possibly_adjust_frame(frame& top) {
intptr_t* sp = top.sp();
CodeBlob* cb = top.cb();

if (cb->frame_size() == 2) {
// C2 runtime stub case. For aarch64 the real size of the c2 runtime stub is 2 words bigger
// than what we think, i.e. size is 4. This is because the _last_Java_sp is not set to the
// sp right before making the call to the VM, but rather it is artificially set 2 words above
// this real sp so that we can store the return address at last_Java_sp[-1], and keep this
// property where we can retrieve the last_Java_pc from the last_Java_sp. But that means that
// once we return to the runtime stub, the code will adjust sp according to this real size.
// So we must adjust the frame size back here and we copy lr/rfp again.
sp -= 2;
sp[-2] = sp[0];
sp[-1] = sp[1];

log_develop_trace(continuations, preempt)("adjusted sp for c2 runtime stub, initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT
" fp: " INTPTR_FORMAT, p2i(sp + frame::metadata_words), p2i(sp), sp[-2]);
}
return sp;
}

inline intptr_t* ThawBase::push_cleanup_continuation() {
frame enterSpecial = new_entry_frame();
intptr_t* sp = enterSpecial.sp();

sp[-1] = (intptr_t)ContinuationEntry::cleanup_pc();
sp[-2] = (intptr_t)enterSpecial.fp();

log_develop_trace(continuations, preempt)("push_cleanup_continuation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
return sp;
}

inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
// Make sure that last_sp is kept relativized.
assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), "");
Expand Down
20 changes: 18 additions & 2 deletions src/hotspot/cpu/aarch64/continuationHelper_aarch64.inline.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -40,6 +40,22 @@ static inline intptr_t** link_address(const frame& f) {
: (intptr_t**)(f.unextended_sp() + f.cb()->frame_size() - frame::sender_sp_offset);
}

static inline void patch_return_pc_with_preempt_stub(frame& f) {
if (f.is_runtime_frame()) {
// Unlike x86 we don't know where in the callee frame the return pc is
// saved so we can't patch the return from the VM call back to Java.
// Instead, we will patch the return from the runtime stub back to the
// compiled method so that the target returns to the preempt cleanup stub.
intptr_t* caller_sp = f.sp() + f.cb()->frame_size();
caller_sp[-1] = (intptr_t)StubRoutines::cont_preempt_stub();
} else {
// The target will check for preemption once it returns to the interpreter
// or the native wrapper code and will manually jump to the preempt stub.
JavaThread *thread = JavaThread::current();
thread->set_preempt_alternate_return(StubRoutines::cont_preempt_stub());
}
}

inline int ContinuationHelper::frame_align_words(int size) {
#ifdef _LP64
return size & 1;
Expand Down Expand Up @@ -83,12 +99,12 @@ inline void ContinuationHelper::set_anchor_to_entry_pd(JavaFrameAnchor* anchor,
anchor->set_last_Java_fp(entry->entry_fp());
}

#ifdef ASSERT
inline void ContinuationHelper::set_anchor_pd(JavaFrameAnchor* anchor, intptr_t* sp) {
intptr_t* fp = *(intptr_t**)(sp - frame::sender_sp_offset);
anchor->set_last_Java_fp(fp);
}

#ifdef ASSERT
inline bool ContinuationHelper::Frame::assert_frame_laid_out(frame f) {
intptr_t* sp = f.sp();
address pc = ContinuationHelper::return_address_at(
Expand Down
52 changes: 50 additions & 2 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
{
assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
if (LockingMode == LM_MONITOR) {
call_VM(noreg,
call_VM_preemptable(noreg,
CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
lock_reg);
} else {
Expand Down Expand Up @@ -756,7 +756,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
bind(slow_case);

// Call the runtime routine for slow case
call_VM(noreg,
call_VM_preemptable(noreg,
CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
lock_reg);

Expand Down Expand Up @@ -1531,6 +1531,54 @@ void InterpreterMacroAssembler::call_VM_base(Register oop_result,
restore_locals();
}

void InterpreterMacroAssembler::call_VM_preemptable(Register oop_result,
address entry_point,
Register arg_1) {
assert(arg_1 == c_rarg1, "");
Label resume_pc, not_preempted;

#ifdef ASSERT
{
Label L;
ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
cbz(rscratch1, L);
stop("Should not have alternate return address set");
bind(L);
}
#endif /* ASSERT */

push_cont_fastpath();

// Make VM call. In case of preemption set last_pc to the one we want to resume to.
adr(rscratch1, resume_pc);
str(rscratch1, Address(rthread, JavaThread::last_Java_pc_offset()));
call_VM_base(oop_result, noreg, noreg, entry_point, 1, false /*check_exceptions*/);

pop_cont_fastpath();

// Check if preempted.
ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
cbz(rscratch1, not_preempted);
str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
br(rscratch1);

// In case of preemption, this is where we will resume once we finally acquire the monitor.
bind(resume_pc);
restore_after_resume(false /* is_native */);

bind(not_preempted);
}

void InterpreterMacroAssembler::restore_after_resume(bool is_native) {
lea(rscratch1, ExternalAddress(Interpreter::cont_resume_interpreter_adapter()));
blr(rscratch1);
if (is_native) {
// On resume we need to set up stack as expected
push(dtos);
push(ltos);
}
}

void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
assert_different_registers(obj, rscratch1, mdo_addr.base(), mdo_addr.index());
Label update, next, none;
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ class InterpreterMacroAssembler: public MacroAssembler {

void load_earlyret_value(TosState state);

void call_VM_preemptable(Register oop_result,
address entry_point,
Register arg_1);
void restore_after_resume(bool is_native);

void jump_to_entry(address entry);

virtual void check_and_handle_popframe(Register java_thread);
Expand Down
11 changes: 10 additions & 1 deletion src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,10 @@ static void pass_arg3(MacroAssembler* masm, Register arg) {
}
}

static bool is_preemptable(address entry_point) {
return entry_point == CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter);
}

void MacroAssembler::call_VM_base(Register oop_result,
Register java_thread,
Register last_java_sp,
Expand Down Expand Up @@ -811,7 +815,12 @@ void MacroAssembler::call_VM_base(Register oop_result,
assert(last_java_sp != rfp, "can't use rfp");

Label l;
set_last_Java_frame(last_java_sp, rfp, l, rscratch1);
if (is_preemptable(entry_point)) {
// skip setting last_pc since we already set it to desired value.
set_last_Java_frame(last_java_sp, rfp, noreg, rscratch1);
} else {
set_last_Java_frame(last_java_sp, rfp, l, rscratch1);
}

// do the call, remove parameters
MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
Expand Down
5 changes: 3 additions & 2 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,8 +938,9 @@ class MacroAssembler: public Assembler {
void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0);

void push_cont_fastpath(Register java_thread);
void pop_cont_fastpath(Register java_thread);
void push_cont_fastpath(Register java_thread = rthread);
void pop_cont_fastpath(Register java_thread = rthread);

void inc_held_monitor_count();
void dec_held_monitor_count();

Expand Down
Loading

0 comments on commit d7c9ac7

Please sign in to comment.