Skip to content

Commit

Permalink
Draft
Browse files Browse the repository at this point in the history
  • Loading branch information
robehn committed May 31, 2023
1 parent a990322 commit 19ca9f9
Show file tree
Hide file tree
Showing 21 changed files with 281 additions and 90 deletions.
3 changes: 2 additions & 1 deletion src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register

initialize_header(obj, klass, noreg, tmp1, tmp2);

if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
if (!(UseTLAB && (ZeroTLAB || AllocatePrefetchZeroing) && is_tlab_allocated)) {
// clear rest of allocated space
const Register index = tmp2;
// 16: multiplier for threshold
Expand Down Expand Up @@ -301,6 +301,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1
initialize_header(obj, klass, len, tmp1, tmp2);

// clear rest of allocated space
// if (!AllocatePrefetchZeroing) when not slow case.
const Register len_zero = len;
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);

Expand Down
82 changes: 70 additions & 12 deletions src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,33 +178,91 @@ void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Re
__ ld(obj, Address(obj, 0)); // *obj
}

void BarrierSetAssembler::prefetch_zero(MacroAssembler* masm, Register new_tlab_top, bool c2) {

const intptr_t prefetch_lines = 1; //MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines);
const intptr_t prefetch_size = AllocatePrefetchStepSize;
const intptr_t prefetch_mask = ~(prefetch_size - 1);
const intptr_t prefetch_distance = (prefetch_lines + 1) * prefetch_size;

assert_different_registers(new_tlab_top, t0, t1, noreg);

Register current_pf_top = t1;
__ ld(current_pf_top, Address(xthread, JavaThread::tlab_pf_top_offset()));

// Make sure we prefetch and zero beyond object end, i.e. new_tlab_top.
Register new_pf_top = t0;
__ andi(new_pf_top, new_tlab_top, prefetch_mask); // end no longer valid
__ addi(new_pf_top, new_pf_top, prefetch_distance);

// Do we need to prefetch and zero ?
Label SKIP_PREFETCH;
__ bgeu(current_pf_top, new_pf_top, SKIP_PREFETCH);

// Store new top
__ sd(new_pf_top, Address(xthread, JavaThread::tlab_pf_top_offset()));

{
Label LOOP;
__ bind(LOOP);
__ cbo_zero(current_pf_top);
__ addi(current_pf_top, current_pf_top, prefetch_size);
__ bltu(current_pf_top, new_pf_top, LOOP);
}
__ bind(SKIP_PREFETCH);
if (c2) {
// __ stop("Check");
}
}

// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj,
void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
Register obj,
Register var_size_in_bytes,
int con_size_in_bytes,
Register tmp1,
Register tmp2,
Label& slow_case,
bool is_far) {
assert_different_registers(obj, tmp2);
assert_different_registers(obj, var_size_in_bytes);
Register end = tmp2;
assert_different_registers(obj, tmp1, tmp2, noreg);
assert_different_registers(obj, var_size_in_bytes, tmp1);

Register new_tlab_top = tmp1;

__ ld(obj, Address(xthread, JavaThread::tlab_top_offset()));
if (var_size_in_bytes == noreg) {
__ la(end, Address(obj, con_size_in_bytes));
__ la(new_tlab_top, Address(obj, con_size_in_bytes));
} else {
__ add(end, obj, var_size_in_bytes);
__ add(new_tlab_top, obj, var_size_in_bytes);
}
__ ld(t0, Address(xthread, JavaThread::tlab_end_offset()));
__ bgtu(end, t0, slow_case, is_far);
__ bgtu(new_tlab_top, t0, slow_case, is_far);

// update the tlab top pointer
__ sd(end, Address(xthread, JavaThread::tlab_top_offset()));

// recover var_size_in_bytes if necessary
if (var_size_in_bytes == end) {
__ sub(var_size_in_bytes, var_size_in_bytes, obj);
__ sd(new_tlab_top, Address(xthread, JavaThread::tlab_top_offset()));

if (AllocatePrefetchZeroing) {
#ifdef ASSERT
{
Label PASSED;
Register current_pf_top = t0;
__ ld(current_pf_top, Address(xthread, JavaThread::tlab_pf_top_offset()));
__ bgeu(current_pf_top, obj, PASSED); // current_pf_top >= obj
__ stop("PF top under obj, zeroing published memory.");
__ bind(PASSED);
}
#endif
prefetch_zero(masm, new_tlab_top);
#ifdef ASSERT
{
Label PASSED;
Register current_pf_top = t0;
__ ld(current_pf_top, Address(xthread, JavaThread::tlab_pf_top_offset()));
__ bgeu(current_pf_top, new_tlab_top, PASSED);
__ stop("Not EQ");
__ bind(PASSED);
}
#endif
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class BarrierSetAssembler: public CHeapObj<mtGC> {
bool is_far = false
);

void prefetch_zero(MacroAssembler* masm, Register new_tlab_top, bool c2 = false);

virtual void barrier_stubs_init() {}

virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/globals_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(bool, UseZbs, false, EXPERIMENTAL, "Use Zbs instructions") \
product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \
product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \
product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \
product(bool, UseZicboz, false, EXPERIMENTAL, "Use Zicboz instructions") \
product(bool, UseZicbop, true, EXPERIMENTAL, "Use Zicbop instructions") \
product(bool, UseZicboz, true, EXPERIMENTAL, "Use Zicboz instructions") \
product(bool, UseZihintpause, false, EXPERIMENTAL, \
"Use Zihintpause instructions") \
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
Expand Down
3 changes: 2 additions & 1 deletion src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3039,7 +3039,8 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
bind(L_fallthrough);
}

// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
// Defines obj, preserves var_size_in_bytes
// ========> NO okay for tmp2 == var_size_in_bytes.
void MacroAssembler::tlab_allocate(Register obj,
Register var_size_in_bytes,
int con_size_in_bytes,
Expand Down
15 changes: 15 additions & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -5422,6 +5422,21 @@ instruct prefetchalloc( memory mem ) %{
ins_pipe(iload_prefetch);
%}

instruct prefetchallocZeroing( memory mem ) %{
predicate(UseZicboz && AllocatePrefetchZeroing);
match(PrefetchAllocationZeroing mem);

ins_cost(ALU_COST * 1);
format %{ "prefetchzeroing $mem\t# Prefetch and zero" %}

ins_encode %{
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->prefetch_zero(&_masm, as_Register($mem$$base), true);
%}

ins_pipe(iload_prefetch);
%}

// ============================================================================
// Atomic operation instructions
//
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/templateTable_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3482,9 +3482,9 @@ void TemplateTable::_new() {
// Go to slow path.

if (UseTLAB) {
__ tlab_allocate(x10, x13, 0, noreg, x11, slow_case);
__ tlab_allocate(x10, x13, 0, x12, x11, slow_case);

if (ZeroTLAB) {
if (ZeroTLAB || AllocatePrefetchZeroing) {
// the fields have been already cleared
__ j(initialize_header);
}
Expand Down
106 changes: 64 additions & 42 deletions src/hotspot/cpu/riscv/vm_version_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ void VM_Version::initialize() {
if (UseZic64b) {
if (CacheLineSize != 64) {
assert(!FLAG_IS_DEFAULT(CacheLineSize), "default cache line size should be 64 bytes");
warning("CacheLineSize is assumed to be 64 bytes because Zic64b is enabled");
// warning("CacheLineSize is assumed to be 64 bytes because Zic64b is enabled");
FLAG_SET_DEFAULT(CacheLineSize, 64);
}
} else {
Expand Down Expand Up @@ -203,9 +203,19 @@ void VM_Version::initialize() {
if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) {
FLAG_SET_DEFAULT(BlockZeroingLowLimit, 2 * CacheLineSize);
}
} else if (UseBlockZeroing) {
warning("Block zeroing is not available");
FLAG_SET_DEFAULT(UseBlockZeroing, false);
if (FLAG_IS_DEFAULT(AllocatePrefetchZeroing)) {
// warning("AllocatePrefetchZeroing enabled by UseZicboz");
FLAG_SET_DEFAULT(AllocatePrefetchZeroing, true);
}
} else {
if (UseBlockZeroing) {
warning("Block zeroing is not available on this CPU");
FLAG_SET_DEFAULT(UseBlockZeroing, false);
}
if (AllocatePrefetchZeroing) {
warning("AllocatePrefetchZeroing specified, but not available on this CPU");
FLAG_SET_DEFAULT(AllocatePrefetchZeroing, false);
}
}

char buf[512];
Expand All @@ -218,6 +228,56 @@ void VM_Version::initialize() {

_features_string = os::strdup(buf);

/* if (!UseZicbop) {
if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
warning("Zicbop is not available on this CPU");
}
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
} else {*/

// Limit AllocatePrefetchDistance so that it does not exceed the
// constraint in AllocatePrefetchDistanceConstraintFunc.
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3 * (int)CacheLineSize));
}
assert(CacheLineSize == 64, "Must be");
if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
FLAG_SET_DEFAULT(AllocatePrefetchStepSize, (int)CacheLineSize);
}
assert(CacheLineSize == AllocatePrefetchStepSize, "Must be");
if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3 * (int)CacheLineSize);
}
if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3 * (int)CacheLineSize);
}

if (PrefetchCopyIntervalInBytes != -1 &&
((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
PrefetchCopyIntervalInBytes &= ~7;
if (PrefetchCopyIntervalInBytes >= 32768) {
PrefetchCopyIntervalInBytes = 32760;
}
}
if (AllocatePrefetchDistance !=-1 && (AllocatePrefetchDistance & 7)) {
warning("AllocatePrefetchDistance must be multiple of 8");
AllocatePrefetchDistance &= ~7;
}
if (AllocatePrefetchZeroing) {
assert(AllocatePrefetchStepSize == CacheLineSize, "Must be same");
// warning("AllocatePrefetchStepSize must be same as CacheLineSize when using AllocatePrefetchStepSize.");
AllocatePrefetchStepSize = CacheLineSize;
}
if (AllocatePrefetchStepSize & 7) {
warning("AllocatePrefetchStepSize must be multiple of 8");
AllocatePrefetchStepSize &= ~7;
}

assert(CacheLineSize == AllocatePrefetchStepSize, "Must be");
assert(64 == AllocatePrefetchStepSize, "Must be");
assert(AllocatePrefetchZeroing, "Must be");

#ifdef COMPILER2
c2_initialize();
#endif // COMPILER2
Expand Down Expand Up @@ -262,44 +322,6 @@ void VM_Version::c2_initialize() {
}
}

if (!UseZicbop) {
if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
warning("Zicbop is not available on this CPU");
}
FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
} else {
// Limit AllocatePrefetchDistance so that it does not exceed the
// constraint in AllocatePrefetchDistanceConstraintFunc.
if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3 * (int)CacheLineSize));
}
if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
FLAG_SET_DEFAULT(AllocatePrefetchStepSize, (int)CacheLineSize);
}
if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3 * (int)CacheLineSize);
}
if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3 * (int)CacheLineSize);
}

if (PrefetchCopyIntervalInBytes != -1 &&
((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
PrefetchCopyIntervalInBytes &= ~7;
if (PrefetchCopyIntervalInBytes >= 32768) {
PrefetchCopyIntervalInBytes = 32760;
}
}
if (AllocatePrefetchDistance !=-1 && (AllocatePrefetchDistance & 7)) {
warning("AllocatePrefetchDistance must be multiple of 8");
AllocatePrefetchDistance &= ~7;
}
if (AllocatePrefetchStepSize & 7) {
warning("AllocatePrefetchStepSize must be multiple of 8");
AllocatePrefetchStepSize &= ~7;
}
}

if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

#include "runtime/prefetch.hpp"

inline void Prefetch::read (const void *loc, intx interval) {
inline void Prefetch::read(const void *loc, intx interval) {
if (interval >= 0 && UseZicbop) {
// encoding for prefetch.r
asm("ori zero, %0, 1" : : "r"(intptr_t(loc)+interval));
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/adlc/formssel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3529,7 +3529,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"ClearArray"
};
int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
if( strcmp(_opType,"PrefetchAllocation")==0 )
if( strcmp(_opType,"PrefetchAllocation")==0 || strcmp(_opType,"PrefetchAllocationZeroing")==0)
return 1;
if( strcmp(_opType,"CacheWB")==0 )
return 1;
Expand Down
33 changes: 24 additions & 9 deletions src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -741,16 +741,31 @@ Node* BarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* mem, Node* toobi
macro->transform_later(needgc_false);

// Fast path:
i_o = macro->prefetch_allocation(i_o, needgc_false, mem,
old_tlab_top, new_tlab_top, prefetch_lines);
if (UseTLAB && AllocatePrefetchZeroing) {
Node* prefetch = new PrefetchAllocationZeroingNode(mem, new_tlab_top);
// Node* prefetch = new PrefetchAllocationNode(mem, new_tlab_top);
prefetch->set_req(0, needgc_false);
macro->transform_later(prefetch);
mem = prefetch;

// Store the modified TLAB top back down.
Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr, TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
macro->transform_later(store_tlab_top);

fast_oop_ctrl = needgc_false;
fast_oop_rawmem = store_tlab_top;

// Store the modified TLAB top back down.
Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
macro->transform_later(store_tlab_top);

fast_oop_ctrl = needgc_false;
fast_oop_rawmem = store_tlab_top;
} else {
i_o = macro->prefetch_allocation(i_o, needgc_false, mem, old_tlab_top, new_tlab_top, prefetch_lines);
// Store the modified TLAB top back down.
Node* store_tlab_top = new StorePNode(needgc_false, mem, tlab_top_adr,
TypeRawPtr::BOTTOM, new_tlab_top, MemNode::unordered);
macro->transform_later(store_tlab_top);

fast_oop_ctrl = needgc_false;
fast_oop_rawmem = store_tlab_top;
}

return old_tlab_top;
}

Expand Down
Loading

0 comments on commit 19ca9f9

Please sign in to comment.