Skip to content

Commit

Permalink
Merge branch 'master' into 8332265
Browse files Browse the repository at this point in the history
  • Loading branch information
robehn committed May 22, 2024
2 parents c406294 + 67f03f2 commit a4e88a4
Show file tree
Hide file tree
Showing 54 changed files with 1,010 additions and 506 deletions.
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/matcher_riscv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@
static const bool int_in_long = true;

// Does the CPU supports vector variable shift instructions?
static constexpr bool supports_vector_variable_shifts(void) {
return false;
static bool supports_vector_variable_shifts(void) {
return UseRVV;
}

// Does target support predicated operation emulation.
Expand Down
26 changes: 26 additions & 0 deletions src/hotspot/cpu/riscv/riscv_v.ad
Original file line number Diff line number Diff line change
Expand Up @@ -2218,6 +2218,32 @@ instruct replicateD(vReg dst, fRegD src) %{
%}

// vector shift
//
// Following shift instruct's are shared by vectorization (in SLP, superword.cpp) and Vector API.
//
// Shift behaviour in vectorization is defined by java language spec, which includes:
// 1. "If the promoted type of the left-hand operand is int, then only the five lowest-order bits of
// the right-hand operand are used as the shift distance. It is as if the right-hand operand were
// subjected to a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f (0b11111).
// The shift distance actually used is therefore always in the range 0 to 31, inclusive."
// 2. similarly, for long "with the mask value 0x3f (0b111111)"
// check https://docs.oracle.com/javase/specs/jls/se21/html/jls-15.html#jls-15.19 for details.
//
// Shift behaviour in Vector API is defined as:
// e.g. for ASHR, "a>>(n&(ESIZE*8-1))"
// this behaviour is the same as shift instrunction's in riscv vector extension.
// check https://docs.oracle.com/en/java/javase/21/docs/api/jdk.incubator.vector/jdk/incubator/vector/VectorOperators.html#ASHR
// and https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#116-vector-single-width-shift-instructions for details.
//
// Despite the difference between these 2 behaviours, the same shift instruct's of byte and short are
// still shared between vectorization and Vector API. The way it works is hidden inside the implementation
// of vectorization and Vector API:
// 1. when doing optimization vectorization masks the shift value with "(BitsPerInt - 1)" or "(BitsPerLong - 1)"
// 2. in Vector API, shift value is masked with SHIFT_MASK (e.g. for ByteVector it's "Byte.SIZE - 1")
//
// If not because of this pre-processing of shift value respectively in vectorization and Vector API, then
// e.g. for a byte shift value 16, the intrinsic behaviour will be different, and they can not share the same
// instruct here, as vectorization requires x >> 16, but Vector API requires x >> (16 & 7).

instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
match(Set dst (RShiftVB src shift));
Expand Down
7 changes: 5 additions & 2 deletions src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2274,7 +2274,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
__ call_VM_leaf(entry);

__ bind(*stub->continuation());
if (stub != nullptr) {
__ bind(*stub->continuation());
}
}

void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
Expand Down Expand Up @@ -2385,7 +2387,8 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
arrayOopDesc::base_offset_in_bytes(op->type()),
type2aelembytes(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
*op->stub()->entry(),
op->zero_array());
}
__ bind(*op->stub()->continuation());
}
Expand Down
24 changes: 20 additions & 4 deletions src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -710,7 +710,13 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
frame_map()->c_calling_convention(&sig);

// Make all state_for calls early since they can emit code.
CodeEmitInfo* info = state_for (x, x->state());
CodeEmitInfo* info = nullptr;
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
info = state_for(x, x->state_before());
info->set_force_reexecute();
} else {
info = state_for(x, x->state());
}

LIRItem src(x->argument_at(0), this);
LIRItem src_pos(x->argument_at(1), this);
Expand All @@ -735,6 +741,9 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
int flags;
ciArrayKlass* expected_type;
arraycopy_helper(x, &flags, &expected_type);
if (x->check_flag(Instruction::OmitChecksFlag)) {
flags = 0;
}

__ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
length.result(), tmp, expected_type, flags, info); // does add_safepoint
Expand Down Expand Up @@ -769,7 +778,13 @@ void LIRGenerator::do_NewInstance(NewInstance* x) {
}

void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
CodeEmitInfo* info = state_for (x, x->state());
CodeEmitInfo* info = nullptr;
if (x->state_before() != nullptr && x->state_before()->force_reexecute()) {
info = state_for(x, x->state_before());
info->set_force_reexecute();
} else {
info = state_for(x, x->state());
}

LIRItem length(x->length(), this);
length.load_item();
Expand All @@ -786,7 +801,8 @@ void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
__ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);

CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
__ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type,
klass_reg, slow_path, x->zero_array());

LIR_Opr result = rlock_result(x);
__ move(reg, result);
Expand Down
27 changes: 14 additions & 13 deletions src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -191,7 +191,6 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
}

void C1_MacroAssembler::initialize_body(Register objectFields, Register len_in_bytes, Register Rzero) {
Label done;
assert_different_registers(objectFields, len_in_bytes, Rzero);

// Initialize object fields.
Expand All @@ -203,7 +202,6 @@ void C1_MacroAssembler::initialize_body(Register objectFields, Register len_in_b
// Use Rzero as src length, then mvcle will copy nothing
// and fill the object with the padding value 0.
move_long_ext(objectFields, as_Register(Rzero->encoding()-1), 0);
bind(done);
}

void C1_MacroAssembler::allocate_object(
Expand Down Expand Up @@ -274,7 +272,8 @@ void C1_MacroAssembler::allocate_array(
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // Continuation point if fast allocation fails.
Label& slow_case, // Continuation point if fast allocation fails.
bool zero_array // zero the allocated array or not
) {
assert_different_registers(obj, len, t1, t2, klass);

Expand Down Expand Up @@ -305,15 +304,17 @@ void C1_MacroAssembler::allocate_array(
initialize_header(obj, klass, len, noreg, t1);

// Clear rest of allocated space.
Label done;
Register object_fields = t1;
Register Rzero = Z_R1_scratch;
z_aghi(arr_size, -base_offset_in_bytes);
z_bre(done); // Jump if size of fields is zero.
z_la(object_fields, base_offset_in_bytes, obj);
z_xgr(Rzero, Rzero);
initialize_body(object_fields, arr_size, Rzero);
bind(done);
if (zero_array) {
Label done;
Register object_fields = t1;
Register Rzero = Z_R1_scratch;
z_aghi(arr_size, -base_offset_in_bytes);
z_bre(done); // Jump if size of fields is zero.
z_la(object_fields, base_offset_in_bytes, obj);
z_xgr(Rzero, Rzero);
initialize_body(object_fields, arr_size, Rzero);
bind(done);
}

// Dtrace support is unimplemented.
// if (CURRENT_ENV->dtrace_alloc_probes()) {
Expand Down
5 changes: 3 additions & 2 deletions src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* Copyright (c) 2016, 2024 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -89,7 +89,8 @@
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // Continuation point if fast allocation fails.
Label& slow_case, // Continuation point if fast allocation fails.
bool zero_array // zero the allocated array or not
);

// Invalidates registers in this window.
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/c1/c1_Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ bool Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
case vmIntrinsics::_counterTime:
#endif
case vmIntrinsics::_getObjectSize:
#if defined(X86) || defined(AARCH64)
#if defined(X86) || defined(AARCH64) || defined(S390)
case vmIntrinsics::_clone:
#endif
break;
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/share/c1/c1_LIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ LIR_OpArrayCopy::LIR_OpArrayCopy(LIR_Opr src, LIR_Opr src_pos, LIR_Opr dst, LIR_
, _tmp(tmp)
, _expected_type(expected_type)
, _flags(flags) {
#if defined(X86) || defined(AARCH64)
#if defined(X86) || defined(AARCH64) || defined(S390)
if (expected_type != nullptr && flags == 0) {
_stub = nullptr;
} else {
Expand Down
31 changes: 0 additions & 31 deletions src/hotspot/share/gc/serial/tenuredGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,37 +343,6 @@ void TenuredGeneration::gc_prologue() {
_used_at_prologue = used();
}

bool TenuredGeneration::should_collect(bool full,
size_t size,
bool is_tlab) {
// This should be one big conditional or (||), but I want to be able to tell
// why it returns what it returns (without re-evaluating the conditionals
// in case they aren't idempotent), so I'm doing it this way.
// DeMorgan says it's okay.
if (full) {
log_trace(gc)("TenuredGeneration::should_collect: because full");
return true;
}
if (should_allocate(size, is_tlab)) {
log_trace(gc)("TenuredGeneration::should_collect: because should_allocate(" SIZE_FORMAT ")", size);
return true;
}
// If we don't have very much free space.
// XXX: 10000 should be a percentage of the capacity!!!
if (free() < 10000) {
log_trace(gc)("TenuredGeneration::should_collect: because free(): " SIZE_FORMAT, free());
return true;
}
// If we had to expand to accommodate promotions from the young generation
if (_capacity_at_prologue < capacity()) {
log_trace(gc)("TenuredGeneration::should_collect: because_capacity_at_prologue: " SIZE_FORMAT " < capacity(): " SIZE_FORMAT,
_capacity_at_prologue, capacity());
return true;
}

return false;
}

void TenuredGeneration::compute_new_size() {
assert_locked_or_safepoint(Heap_lock);

Expand Down
4 changes: 0 additions & 4 deletions src/hotspot/share/gc/serial/tenuredGeneration.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,6 @@ class TenuredGeneration: public Generation {
void gc_prologue();
void gc_epilogue();

bool should_collect(bool full,
size_t word_size,
bool is_tlab);

bool should_allocate(size_t word_size, bool is_tlab) {
bool result = false;
size_t overflow_limit = (size_t)1 << (BitsPerSize_t - LogHeapWordSize);
Expand Down
6 changes: 3 additions & 3 deletions src/hotspot/share/gc/shared/gcVMOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ bool VM_GC_Operation::doit_prologue() {


void VM_GC_Operation::doit_epilogue() {
// Clean up old interpreter OopMap entries that were replaced
// during the GC thread root traversal.
OopMapCache::cleanup_old_entries();
// GC thread root traversal likely used OopMapCache a lot, which
// might have created lots of old entries. Trigger the cleanup now.
OopMapCache::trigger_cleanup();
if (Universe::has_reference_pending_list()) {
Heap_lock->notify_all();
}
Expand Down
4 changes: 3 additions & 1 deletion src/hotspot/share/gc/shenandoah/shenandoahVMOperations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ bool VM_ShenandoahOperation::doit_prologue() {

void VM_ShenandoahOperation::doit_epilogue() {
assert(!ShenandoahHeap::heap()->has_gc_state_changed(), "GC State was not synchronized to java threads.");
// GC thread root traversal likely used OopMapCache a lot, which
// might have created lots of old entries. Trigger the cleanup now.
OopMapCache::trigger_cleanup();
}

bool VM_ShenandoahReferenceOperation::doit_prologue() {
Expand All @@ -52,7 +55,6 @@ bool VM_ShenandoahReferenceOperation::doit_prologue() {

void VM_ShenandoahReferenceOperation::doit_epilogue() {
VM_ShenandoahOperation::doit_epilogue();
OopMapCache::cleanup_old_entries();
if (Universe::has_reference_pending_list()) {
Heap_lock->notify_all();
}
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/x/xDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "gc/x/xServiceability.hpp"
#include "gc/x/xStat.hpp"
#include "gc/x/xVerify.hpp"
#include "interpreter/oopMapCache.hpp"
#include "logging/log.hpp"
#include "memory/universe.hpp"
#include "runtime/threads.hpp"
Expand Down Expand Up @@ -130,6 +131,10 @@ class VM_XOperation : public VM_Operation {

virtual void doit_epilogue() {
Heap_lock->unlock();

// GC thread root traversal likely used OopMapCache a lot, which
// might have created lots of old entries. Trigger the cleanup now.
OopMapCache::trigger_cleanup();
}

bool gc_locked() const {
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/share/gc/z/zGeneration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "gc/z/zUncoloredRoot.inline.hpp"
#include "gc/z/zVerify.hpp"
#include "gc/z/zWorkers.hpp"
#include "interpreter/oopMapCache.hpp"
#include "logging/log.hpp"
#include "memory/universe.hpp"
#include "prims/jvmtiTagMap.hpp"
Expand Down Expand Up @@ -452,6 +453,10 @@ class VM_ZOperation : public VM_Operation {

virtual void doit_epilogue() {
Heap_lock->unlock();

// GC thread root traversal likely used OopMapCache a lot, which
// might have created lots of old entries. Trigger the cleanup now.
OopMapCache::trigger_cleanup();
}

bool success() const {
Expand Down
Loading

0 comments on commit a4e88a4

Please sign in to comment.