Skip to content

Commit

Permalink
8329031: CPUID feature detection for Advanced Performance Extensions …
Browse files Browse the repository at this point in the history
…(Intel® APX)

Reviewed-by: sviswanathan, kvn
  • Loading branch information
Jatin Bhateja committed Jun 9, 2024
1 parent 8d2f9e5 commit a941397
Show file tree
Hide file tree
Showing 8 changed files with 185 additions and 26 deletions.
6 changes: 4 additions & 2 deletions src/hotspot/cpu/x86/globals_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Highest supported AVX instructions set on x86/x64") \
range(0, 3) \
\
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Intel Advanced Performance Extensions") \
\
product(bool, UseKNLSetting, false, DIAGNOSTIC, \
"Control whether Knights platform setting should be used") \
\
Expand Down Expand Up @@ -234,8 +238,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
"Turn off JVM mitigations related to Intel micro code " \
"mitigations for the Intel JCC erratum") \
\
product(bool, UseAPX, false, EXPERIMENTAL, \
"Use Advanced Performance Extensions on x86") \
// end of ARCH_FLAGS

#endif // CPU_X86_GLOBALS_X86_HPP
123 changes: 110 additions & 13 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,22 @@ const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEAT
address VM_Version::_cpuinfo_segv_addr = 0;
// Address of instruction after the one which causes SEGV
address VM_Version::_cpuinfo_cont_addr = 0;
// Address of instruction which causes APX specific SEGV
address VM_Version::_cpuinfo_segv_addr_apx = 0;
// Address of instruction after the one which causes APX specific SEGV
address VM_Version::_cpuinfo_cont_addr_apx = 0;

static BufferBlob* stub_blob;
static const int stub_size = 2000;

extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
typedef void (*clear_apx_test_state_t)(void);
}
static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
static detect_virt_stub_t detect_virt_stub = nullptr;
static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;

#ifdef _LP64

Expand Down Expand Up @@ -102,6 +108,27 @@ class VM_Version_StubGenerator: public StubCodeGenerator {

VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}

address clear_apx_test_state() {
# define __ _masm->
address start = __ pc();
// EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
// handling guarantees that preserved register values post signal handling were
// re-instantiated by operating system and not because they were not modified externally.

/* FIXME Uncomment following code after OS enablement of
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
// EGPR state save/restoration.
__ mov64(r16, 0L);
__ mov64(r31, 0L);
UseAPX = save_apx;
VM_Version::clean_cpuFeatures();
*/
__ ret(0);
return start;
}

address generate_get_cpu_info() {
// Flags to test CPU type.
const uint32_t HS_EFL_AC = 0x40000;
Expand All @@ -113,7 +140,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;

StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
Expand Down Expand Up @@ -288,7 +316,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 4), rdx);

//
// cpuid(0x7) Structured Extended Features
// cpuid(0x7) Structured Extended Features Enumeration Leaf.
//
__ bind(sef_cpuid);
__ movl(rax, 7);
Expand All @@ -303,12 +331,16 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi, 12), rdx);

// ECX = 1
//
// cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
//
__ bind(sefsl1_cpuid);
__ movl(rax, 7);
__ movl(rcx, 1);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rdx);

//
// Extended cpuid(0x80000000)
Expand Down Expand Up @@ -387,6 +419,46 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);

#ifndef PRODUCT
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports APX
//
// To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
// and XCRO[19] bit for OS support to save/restore extended GPR state.
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
__ movl(rax, 0x200000);
__ andl(rax, Address(rsi, 4));
__ cmpl(rax, 0x200000);
__ jcc(Assembler::notEqual, vector_save_restore);
// check _cpuid_info.xem_xcr0_eax.bits.apx_f
__ movl(rax, 0x80000);
__ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
__ cmpl(rax, 0x80000);
__ jcc(Assembler::notEqual, vector_save_restore);

/* FIXME: Uncomment while integrating JDK-8329032
bool save_apx = UseAPX;
VM_Version::set_apx_cpuFeatures();
UseAPX = true;
__ mov64(r16, VM_Version::egpr_test_value());
__ mov64(r31, VM_Version::egpr_test_value());
*/
__ xorl(rsi, rsi);
VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
// Generate SEGV
__ movl(rax, Address(rsi, 0));

VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
/* FIXME: Uncomment after integration of JDK-8329032
__ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
__ movq(Address(rsi, 0), r16);
__ movq(Address(rsi, 8), r31);
UseAPX = save_apx;
*/
#endif
__ bind(vector_save_restore);
//
// Check if OS has enabled XGETBV instruction to access XCR0
// (OSXSAVE feature flag) and CPU supports AVX
Expand Down Expand Up @@ -580,6 +652,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ vmovdqu(xmm7, Address(rsp, 0));
__ addptr(rsp, 32);
#endif // _WINDOWS

generate_vzeroupper(wrapup);
VM_Version::clean_cpuFeatures();
UseAVX = saved_useavx;
Expand Down Expand Up @@ -940,6 +1013,7 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
}
}

if (UseAVX > use_avx_limit) {
if (UseSSE < 4) {
warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
Expand All @@ -963,6 +1037,16 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_APX_F;
}

// Currently APX support is only enabled for targets supporting AVX512VL feature.
bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
if (UseAPX && !apx_supported) {
warning("UseAPX is not supported on this CPU, setting it to false");
FLAG_SET_DEFAULT(UseAPX, false);
} else if (FLAG_IS_DEFAULT(UseAPX)) {
FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
}

if (UseAVX < 2) {
Expand Down Expand Up @@ -1002,14 +1086,6 @@ void VM_Version::get_processor_features() {
}
}

// APX support not enabled yet
if (UseAPX) {
if (!FLAG_IS_DEFAULT(UseAPX)) {
warning("APX is not supported on this CPU.");
}
FLAG_SET_DEFAULT(UseAPX, false);
}

if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
_has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
} else {
Expand Down Expand Up @@ -2143,6 +2219,10 @@ int VM_Version::avx3_threshold() {
FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
}

void VM_Version::clear_apx_test_state() {
clear_apx_test_state_stub();
}

static bool _vm_version_initialized = false;

void VM_Version::initialize() {
Expand All @@ -2160,6 +2240,8 @@ void VM_Version::initialize() {
detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
g.generate_detect_virt());

clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
g.clear_apx_test_state());
get_processor_features();

LP64_ONLY(Assembler::precompute_instructions();)
Expand Down Expand Up @@ -2958,6 +3040,10 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_SSE4_2;
if (std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
xem_xcr0_eax.bits.apx_f != 0) {
result |= CPU_APX_F;
}
if (std_cpuid1_ecx.bits.avx != 0 &&
std_cpuid1_ecx.bits.osxsave != 0 &&
xem_xcr0_eax.bits.sse != 0 &&
Expand All @@ -2968,7 +3054,7 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const {
result |= CPU_F16C;
if (sef_cpuid7_ebx.bits.avx2 != 0) {
result |= CPU_AVX2;
if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
result |= CPU_AVX_IFMA;
}
if (sef_cpuid7_ecx.bits.gfni != 0)
Expand Down Expand Up @@ -3142,6 +3228,17 @@ bool VM_Version::os_supports_avx_vectors() {
return retVal;
}

bool VM_Version::os_supports_apx_egprs() {
if (!supports_apx_f()) {
return false;
}
if (_cpuid_info.apx_save[0] != egpr_test_value() ||
_cpuid_info.apx_save[1] != egpr_test_value()) {
return false;
}
return true;
}

uint VM_Version::cores_per_cpu() {
uint result = 1;
if (is_intel()) {
Expand Down
Loading

0 comments on commit a941397

Please sign in to comment.