Skip to content

Commit

Permalink
Baseline, passed t1 and t2.
Browse files Browse the repository at this point in the history
  • Loading branch information
robehn committed May 22, 2024
1 parent c3bc23f commit 5890ce4
Show file tree
Hide file tree
Showing 15 changed files with 699 additions and 204 deletions.
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
}
Address resolve(SharedRuntime::get_resolve_static_call_stub(),
relocInfo::static_call_type);
address call = __ trampoline_call(resolve);
address call = __ patchable_far_call(resolve);
if (call == nullptr) {
ce->bailout("trampoline stub overflow");
return;
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ void LIR_Assembler::align_call(LIR_Code code) {
}

void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
address call = __ trampoline_call(Address(op->addr(), rtype));
address call = __ patchable_far_call(Address(op->addr(), rtype));
if (call == nullptr) {
bailout("trampoline stub overflow");
return;
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ friend class ArrayCopyStub;
// See emit_static_call_stub for detail
// CompiledDirectCall::to_interp_stub_size() (14) + CompiledDirectCall::to_trampoline_stub_size() (1 + 3 + address)
_call_stub_size = 14 * NativeInstruction::instruction_size +
(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
(NativeInstruction::instruction_size + NativeShortCall::trampoline_size),
// See emit_exception_handler for detail
// verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
_exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ void C2_MacroAssembler::string_indexof(Register haystack, Register needle,
stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
assert(stub.target() != nullptr, "string_indexof_linear_uu stub has not been generated");
}
address call = trampoline_call(stub);
address call = patchable_far_call(stub);
if (call == nullptr) {
DEBUG_ONLY(reset_labels(LINEARSEARCH, DONE, NOMATCH));
ciEnv::current()->record_failure("CodeCache is full");
Expand Down Expand Up @@ -1478,7 +1478,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
ShouldNotReachHere();
}
assert(stub.target() != nullptr, "compare_long_string stub has not been generated");
address call = trampoline_call(stub);
address call = patchable_far_call(stub);
if (call == nullptr) {
DEBUG_ONLY(reset_labels(DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, SHORT_LOOP_START));
ciEnv::current()->record_failure("CodeCache is full");
Expand Down
7 changes: 4 additions & 3 deletions src/hotspot/cpu/riscv/codeBuffer_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,18 @@ static bool emit_shared_trampolines(CodeBuffer* cb, CodeBuffer::SharedTrampoline
if (requests == nullptr) {
return true;
}
assert(UseTrampolines, "We are not using trampolines");

MacroAssembler masm(cb);

auto emit = [&](address dest, const CodeBuffer::Offsets &offsets) {
assert(cb->stubs()->remaining() >= MacroAssembler::max_trampoline_stub_size(), "pre-allocated trampolines");
assert(cb->stubs()->remaining() >= MacroAssembler::max_patchable_far_call_stub_size(), "pre-allocated trampolines");
LinkedListIterator<int> it(offsets.head());
int offset = *it.next();
address stub = __ emit_trampoline_stub(offset, dest);
assert(stub, "pre-allocated trampolines");

address reloc_pc = cb->stubs()->end() - NativeCallTrampolineStub::instruction_size;
address reloc_pc = cb->stubs()->end() - NativeShortCall::trampoline_size;
while (!it.is_empty()) {
offset = *it.next();
address caller_pc = cb->insts()->start() + offset;
Expand All @@ -70,7 +71,7 @@ static bool emit_shared_trampolines(CodeBuffer* cb, CodeBuffer::SharedTrampoline
};

assert(requests->number_of_entries() >= 1, "at least one");
const int total_requested_size = MacroAssembler::max_trampoline_stub_size() * requests->number_of_entries();
const int total_requested_size = MacroAssembler::max_patchable_far_call_stub_size() * requests->number_of_entries();
if (cb->stubs()->maybe_expand_to_ensure_remaining(total_requested_size) && cb->blob() == nullptr) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

public:
void flush_bundle(bool start_new_bundle) {}
static constexpr bool supports_shared_stubs() { return true; }
static bool supports_shared_stubs() { return UseTrampolines; }

void share_trampoline_for(address dest, int caller_offset);

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/riscv/compiledIC_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ int CompiledDirectCall::to_trampoline_stub_size() {
// Somewhat pessimistically, we count 4 instructions here (although
// there are only 3) because we sometimes emit an alignment nop.
// Trampoline stubs are always word aligned.
return MacroAssembler::max_trampoline_stub_size();
return MacroAssembler::max_patchable_far_call_stub_size();
}

// Relocation entries for call stub, compiled java to interpreter.
Expand Down
4 changes: 3 additions & 1 deletion src/hotspot/cpu/riscv/globals_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseZvkn, false, EXPERIMENTAL, \
"Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt") \
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
"Use RVV instructions for left/right shift of BigInteger")
"Use RVV instructions for left/right shift of BigInteger") \
product(bool, UseTrampolines, false, EXPERIMENTAL, \
"Far calls uses jal to trampoline.")

#endif // CPU_RISCV_GLOBALS_RISCV_HPP
80 changes: 66 additions & 14 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,17 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
}
}

void MacroAssembler::load_link(const address source, Register temp) {
assert(temp != noreg && temp != x0, "expecting a register");
assert(temp == x5, "expecting a register");
assert_cond(source != nullptr);
int64_t distance = source - pc();
assert(is_simm32(distance), "Must be");
Assembler::auipc(temp, (int32_t)distance + 0x800);
Assembler::_ld(temp, temp, ((int32_t)distance << 20) >> 20);
Assembler::jalr(x1, temp, 0);
}

void MacroAssembler::jump_link(const address dest, Register temp) {
assert_cond(dest != nullptr);
int64_t distance = dest - pc();
Expand Down Expand Up @@ -3514,21 +3525,26 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {

// Maybe emit a call via a trampoline. If the code cache is small
// trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
address MacroAssembler::patchable_far_call(Address entry) {
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
entry.rspec().type() == relocInfo::static_call_type ||
entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");

address target = entry.target();

// We need a trampoline if branches are far.
if (!in_scratch_emit_size()) {
if (entry.rspec().type() == relocInfo::runtime_call_type) {
if (entry.rspec().type() == relocInfo::runtime_call_type && UseTrampolines) {
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
code()->share_trampoline_for(entry.target(), offset());
} else {
address stub = emit_trampoline_stub(offset(), target);
address stub = nullptr;
if (UseTrampolines) {
// We need a trampoline if branches are far.
stub = emit_trampoline_stub(offset(), target);
} else {
stub = emit_address_stub(offset(), target);
}
if (stub == nullptr) {
postcond(pc() == badAddress);
return nullptr; // CodeCache is full
Expand All @@ -3544,7 +3560,11 @@ address MacroAssembler::trampoline_call(Address entry) {
}
#endif
relocate(entry.rspec(), [&] {
jump_link(target, t0);
if (UseTrampolines) {
jump_link(target, t0);
} else {
load_link(target, t0);
}
});

postcond(pc() != badAddress);
Expand All @@ -3556,7 +3576,7 @@ address MacroAssembler::ic_call(address entry, jint method_index) {
IncompressibleRegion ir(this); // relocations
movptr(t1, (address)Universe::non_oop_word());
assert_cond(entry != nullptr);
return trampoline_call(Address(entry, rh));
return patchable_far_call(Address(entry, rh));
}

int MacroAssembler::ic_check_size() {
Expand Down Expand Up @@ -3601,6 +3621,34 @@ int MacroAssembler::ic_check(int end_alignment) {
return uep_offset;
}

address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) {
address stub = start_a_stub(max_patchable_far_call_stub_size());
if (stub == nullptr) {
return nullptr; // CodeBuffer::expand failed
}

// We are always 4-byte aligned here.
assert_alignment(pc());

// Make sure the address of destination 8-byte aligned.
align(wordSize, 0);

RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
insts_call_instruction_offset);
const int stub_start_offset = offset();
relocate(rh, [&] {
assert(offset() - stub_start_offset == 0,
"%ld - %ld == %ld : should be", (long)offset(), (long)stub_start_offset, (long)0);
assert(offset() % wordSize == 0, "bad alignment");
emit_int64((int64_t)dest);
});

const address stub_start_addr = addr_at(stub_start_offset);
end_a_stub();

return stub_start_addr;
}

// Emit a trampoline stub for a call to a target which is too far away.
//
// code sequences:
Expand All @@ -3615,11 +3663,13 @@ int MacroAssembler::ic_check(int end_alignment) {
address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
address dest) {
// Max stub size: alignment nop, TrampolineStub.
address stub = start_a_stub(max_trampoline_stub_size());
address stub = start_a_stub(max_patchable_far_call_stub_size());
if (stub == nullptr) {
return nullptr; // CodeBuffer::expand failed
}

assert(UseTrampolines, "Must be using trampos.");

// We are always 4-byte aligned here.
assert_alignment(pc());

Expand All @@ -3628,7 +3678,7 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
// instructions code-section.

// Make sure the address of destination 8-byte aligned after 3 instructions.
align(wordSize, NativeCallTrampolineStub::data_offset);
align(wordSize, NativeShortCall::trampoline_data_offset);

RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
insts_call_instruction_offset);
Expand All @@ -3641,23 +3691,25 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
ld(t0, target); // auipc + ld
jr(t0); // jalr
bind(target);
assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
assert(offset() - stub_start_offset == NativeShortCall::trampoline_data_offset,
"should be");
assert(offset() % wordSize == 0, "bad alignment");
emit_int64((int64_t)dest);
});

const address stub_start_addr = addr_at(stub_start_offset);

assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");

end_a_stub();

return stub_start_addr;
}

int MacroAssembler::max_trampoline_stub_size() {
int MacroAssembler::max_patchable_far_call_stub_size() {
// Max stub size: alignment nop, TrampolineStub.
return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
if (UseTrampolines) {
return NativeInstruction::instruction_size + NativeShortCall::trampoline_size;
}
return 2 * wordSize;
}

int MacroAssembler::static_call_stub_size() {
Expand Down Expand Up @@ -4376,7 +4428,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) {
RuntimeAddress zero_blocks(StubRoutines::riscv::zero_blocks());
assert(zero_blocks.target() != nullptr, "zero_blocks stub has not been generated");
if (StubRoutines::riscv::complete()) {
address tpc = trampoline_call(zero_blocks);
address tpc = patchable_far_call(zero_blocks);
if (tpc == nullptr) {
DEBUG_ONLY(reset_labels(around));
postcond(pc() == badAddress);
Expand Down
40 changes: 29 additions & 11 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,8 +436,10 @@ class MacroAssembler: public Assembler {
return false;
}

address emit_address_stub(int insts_call_instruction_offset, address target);
address emit_trampoline_stub(int insts_call_instruction_offset, address target);
static int max_trampoline_stub_size();
static int max_patchable_far_call_stub_size();

void emit_static_call_stub();
static int static_call_stub_size();

Expand Down Expand Up @@ -593,6 +595,7 @@ class MacroAssembler: public Assembler {
void bgtz(Register Rs, const address dest);

private:
void load_link(const address source, Register temp);
void jump_link(const address dest, Register temp);
void jump_link(const Address &adr, Register temp);
public:
Expand Down Expand Up @@ -1176,6 +1179,27 @@ class MacroAssembler: public Assembler {
// be used instead.
// All instructions are embedded at a call site.
//
// - indirect call: movptr + jalr
// This too can reach anywhere in the address space, but it cannot be
// patched while code is running, so it must only be modified at a safepoint.
// This form of call is most suitable for targets at fixed addresses, which
// will never be patched.
//
// - patchable far call:
// This is only available in C1/C2-generated code (nmethod).
//
// [Main code section]
// auipc
// ld <address_from_stub_section>
// jalr
// [Stub section]
// trampoline:
// <64-bit destination address>
//
// To change the destination we simply atomically store the new
// address in the stub section.
//
// Old patchable far calls: (-XX:+UseTrampolines)
// - trampoline call:
// This is only available in C1/C2-generated code (nmethod). It is a combination
// of a direct call, which is used if the destination of a call is in range,
Expand All @@ -1195,18 +1219,11 @@ class MacroAssembler: public Assembler {
// cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
// is not used.
// The optimization does not remove the trampoline from the stub section.

//
// This is necessary because the trampoline may well be redirected later when
// code is patched, and the new destination may not be reachable by a simple JAL
// instruction.
//
// - indirect call: movptr + jalr
// This too can reach anywhere in the address space, but it cannot be
// patched while code is running, so it must only be modified at a safepoint.
// This form of call is most suitable for targets at fixed addresses, which
// will never be patched.
//
//
// To patch a trampoline call when the JAL can't reach, we first modify
// the 64-bit destination address in the trampoline, then modify the
// JAL to point to the trampoline, then flush the instruction cache to
Expand All @@ -1219,17 +1236,18 @@ class MacroAssembler: public Assembler {
// invalidated, so there will be a trap at its start.
// For this to work, the destination address in the trampoline is
// always updated, even if we're not using the trampoline.
// --

// Emit a direct call if the entry address will always be in range,
// otherwise a trampoline call.
// otherwise a patachable far call.
// Supported entry.rspec():
// - relocInfo::runtime_call_type
// - relocInfo::opt_virtual_call_type
// - relocInfo::static_call_type
// - relocInfo::virtual_call_type
//
// Return: the call PC or null if CodeCache is full.
address trampoline_call(Address entry);
address patchable_far_call(Address entry);

address ic_call(address entry, jint method_index = 0);
static int ic_check_size();
Expand Down
Loading

0 comments on commit 5890ce4

Please sign in to comment.