Skip to content

Commit

Permalink
[RISC-V] secondary_super_cache does not scale well
Browse files Browse the repository at this point in the history
  • Loading branch information
zifeihan committed May 7, 2024
1 parent 9347bb7 commit 038233b
Show file tree
Hide file tree
Showing 6 changed files with 378 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/hotspot/cpu/riscv/assembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1904,6 +1904,16 @@ enum Nf {

#undef INSN

#define INSN(NAME, op, funct3, Vs1, funct6) \
void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \
patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \
}

// Vector Basic Bit-manipulation (Zvbb) Extension
INSN(vcpop_v, 0b1010111, 0b010, 0b01110, 0b010010);

#undef INSN

#undef patch_VArith

// ====================================
Expand Down
249 changes: 249 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3295,6 +3295,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register tmp2_reg,
Label* L_success,
Label* L_failure) {
// NB! Callers may assume that, when temp2_reg is a valid register,
// this code sets it to a nonzero value.

assert_different_registers(sub_klass, super_klass, tmp1_reg);
if (tmp2_reg != noreg) {
assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
Expand Down Expand Up @@ -3379,6 +3382,252 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
bind(L_fallthrough);
}

// Ensure that the inline code and the stub are using the same registers.
#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
do { \
assert(r_super_klass == x10 && \
r_array_base == x11 && \
r_array_length == x12 && \
(r_array_index == x13 || r_array_index == noreg) && \
(r_sub_klass == x14 || r_sub_klass == noreg) && \
(r_bitmap == t1 || r_bitmap == noreg) && \
(result == x15 || result == noreg), "registers must match riscv.ad"); \
} while(0)

// Return true: we succeeded in generating this code
bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
VectorRegister vreg_v1,
VectorRegister vreg_v2,
Register result,
u1 super_klass_slot,
bool stub_is_near) {
assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, t0, t1);

Label L_fallthrough;

BLOCK_COMMENT("lookup_secondary_supers_table {");

const Register
r_array_base = temp1, // x10
r_array_length = temp2, // x11
r_array_index = temp3, // x12
r_bitmap = t1;

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

u1 bit = super_klass_slot;

// Make sure that result is nonzero if the TBZ below misses.
mv(result, 1);

// We're going to need the bitmap in a vector reg and in a core reg,
// so load both now.
ld(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset()));
if (bit != 0) {
vsetivli(t0, 8, Assembler::e8, Assembler::m1);
la(t0, Address(r_sub_klass, Klass::bitmap_offset()));
vle8_v(vreg_v1, t0);
}
// First check the bitmap to see if super_klass might be present. If
// the bit is zero, we are certain that super_klass is not one of
// the secondary supers.
test_bit(t0, r_bitmap, bit);
beqz(t0, L_fallthrough);

// Get the first array index that can contain super_klass into r_array_index.
if (bit != 0) {
mv(t0, (Klass::SECONDARY_SUPERS_TABLE_MASK - bit));
vsll_vx(vreg_v1, vreg_v1, t0);
vcpop_v(vreg_v1, vreg_v1);
vmv_s_x(vreg_v2, zr);
vredsum_vs(vreg_v2, vreg_v1, vreg_v2);
vmv_x_s(r_array_index, vreg_v2);
} else {
mv(r_array_index, (u1)1);
}
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.

// We will consult the secondary-super array.
ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));

// The value i in r_array_index is >= 1, so even though r_array_base
// points to the length, we don't need to adjust it to point to the
// data.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
assert(Array<Klass*>::length_offset_in_bytes() == 0, "Adjust this code");

shadd(result, r_array_index, r_array_base, result, LogBytesPerWord);
ld(result, Address(result, 0));
xorr(result, result, r_super_klass);
beqz(result, L_fallthrough); // Found a match

// Is there another entry to check? Consult the bitmap.
test_bit(t0, r_bitmap, ((bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK));
beqz(t0, L_fallthrough);

// Linear probe.
if (bit != 0) {
ror_imm(r_bitmap, r_bitmap, bit);
}

// The slot we just inspected is at secondary_supers[r_array_index - 1].
// The next slot to be inspected, by the stub we're about to call,
// is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap
// have been checked.
Address stub = RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub());
if (stub_is_near) {
jal(stub);
} else {
address call = trampoline_call(stub);
if (call == nullptr) {
return false; // trampoline allocation failed
}
}

BLOCK_COMMENT("} lookup_secondary_supers_table");

bind(L_fallthrough);

if (VerifySecondarySupers) {
verify_secondary_supers_table(r_sub_klass, r_super_klass, // x14, x10
temp1, temp2, result); // x11, x12, x15
}
return true;
}

// Called by code generated by check_klass_subtype_slow_path
// above. This is called when there is a collision in the hashed
// lookup in the secondary supers array.
void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register temp1,
Register result) {
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, t0);

const Register
r_array_length = temp1,
r_sub_klass = noreg; // unused

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

Label L_fallthrough, L_huge;

// Load the array length.
lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
// And adjust the array base to point to the data.
// NB! Effectively increments current slot index by 1.
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());

// The bitmap is full to bursting.
// Implicit invariant: BITMAP_FULL implies (length > 0)
assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "");
addi(t0, r_bitmap, (u1)1);
beqz(t0, L_huge);

// NB! Our caller has checked bits 0 and 1 in the bitmap. The
// current slot (at secondary_supers[r_array_index]) has not yet
// been inspected, and r_array_index may be out of bounds if we
// wrapped around the end of the array.

{ // This is conventional linear probing, but instead of terminating
// when a null entry is found in the table, we maintain a bitmap
// in which a 0 indicates missing entries.
// The check above guarantees there are 0s in the bitmap, so the loop
// eventually terminates.
Label L_loop;
bind(L_loop);

// Check for wraparound.
Label notGELabel;
blt(r_array_index, r_array_length, notGELabel);
mv(r_array_index, zr);
bind(notGELabel);

shadd(result, r_array_index, r_array_base, result, LogBytesPerWord);
ld(result, Address(result));
xorr(result, result, r_super_klass);

beqz(result, L_fallthrough);

test_bit(t0, r_bitmap, 2); // look-ahead check (Bit 2); result is non-zero
beqz(t0, L_fallthrough);

ror_imm(r_bitmap, r_bitmap, 1);
addi(r_array_index, r_array_index, 1);
j(L_loop);
}

{ // Degenerate case: more than 64 secondary supers.
// FIXME: We could do something smarter here, maybe a vectorized
// comparison or a binary search, but is that worth any added
// complexity?
bind(L_huge);
mv(result, sp);
repne_scan(r_array_base, r_super_klass, r_array_length, t0);
snez(result, result); // result == 0 iff we got a match.
}

bind(L_fallthrough);
}

// Make sure that the hashed lookup and a linear scan agree.
void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register result) {
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, result, t0);

const Register
r_array_base = temp1,
r_array_length = temp2,
r_array_index = noreg, // unused
r_bitmap = noreg; // unused

LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;

BLOCK_COMMENT("verify_secondary_supers_table {");

// We will consult the secondary-super array.
ld(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset())));

// Load the array length.
lwu(r_array_length, Address(r_array_base, Array<Klass*>::length_offset_in_bytes()));
// And adjust the array base to point to the data.
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());

// Scan R2 words at [R5] for an occurrence of R0.
// Set NZ/Z based on last compare.
mv(t0, sp);
repne_scan(/*addr*/r_array_base, /*value*/r_super_klass, /*count*/r_array_length, t1);
// rscratch1 == 0 iff we got a match.
snez(t0, t0);

Label passed;
snez(result, result); // normalize result to 0/1 for comparison

beq(t0, result, passed);
{
mv(x10, r_super_klass);
mv(x11, r_sub_klass);
mv(x12, /*expected*/t0);
mv(x13, result);
mv(x14, (address)("mismatch"));
rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure), t1);
should_not_reach_here();
}
bind(passed);

BLOCK_COMMENT("} verify_secondary_supers_table");
}

// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
void MacroAssembler::tlab_allocate(Register obj,
Register var_size_in_bytes,
Expand Down
26 changes: 26 additions & 0 deletions src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,32 @@ class MacroAssembler: public Assembler {
Label* L_success,
Label* L_failure);

// As above, but with a constant super_klass.
// The result is in Register result, not the condition codes.
bool lookup_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register temp3,
VectorRegister vtemp4,
VectorRegister vtemp5,
Register result,
u1 super_klass_slot,
bool stub_is_near = false);

void verify_secondary_supers_table(Register r_sub_klass,
Register r_super_klass,
Register temp1,
Register temp2,
Register result);

void lookup_secondary_supers_table_slow_path(Register r_super_klass,
Register r_array_base,
Register r_array_index,
Register r_bitmap,
Register temp1,
Register result);

void check_klass_subtype(Register sub_klass,
Register super_klass,
Register tmp_reg,
Expand Down
34 changes: 34 additions & 0 deletions src/hotspot/cpu/riscv/riscv.ad
Original file line number Diff line number Diff line change
Expand Up @@ -10066,6 +10066,40 @@ instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, i
ins_pipe(pipe_class_memory);
%}

instruct partialSubtypeCheckConstSuper(iRegP_R14 sub, iRegP_R10 super_reg, immP super_con, iRegP_R15 result,
iRegP_R11 tempR11, iRegP_R12 tempR12, iRegP_R13 tempR13,
vReg_V1 v1, vReg_V2 v2, rFlagsReg cr)
%{
match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
predicate(UseSecondarySupersTable);
effect(KILL cr, TEMP tempR11, TEMP tempR12, TEMP tempR13, TEMP v1, TEMP v2);

ins_cost(700); // smaller than the next version
format %{ "partialSubtypeCheck $result, $sub, super" %}

ins_encode %{
bool success = false;
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
if (InlineSecondarySupersTest) {
success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register,
$tempR11$$Register, $tempR12$$Register, $tempR13$$Register,
as_VectorRegister($v1$$reg),
as_VectorRegister($v2$$reg),
$result$$Register,
super_klass_slot);
} else {
address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot)));
success = (call != nullptr);
}
if (!success) {
ciEnv::current()->record_failure("CodeCache is full");
return;
}
%}

ins_pipe(pipe_class_memory);
%}

instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
immP0 zero, rFlagsReg cr)
%{
Expand Down
Loading

0 comments on commit 038233b

Please sign in to comment.