From d152151587e9891ee91efb5111626373d7b83dd1 Mon Sep 17 00:00:00 2001 From: Robbin Ehn Date: Thu, 9 Nov 2023 13:00:46 +0000 Subject: [PATCH] zcb instruction set --- src/hotspot/cpu/riscv/assembler_riscv.hpp | 260 +++++++++++++++++- .../cpu/riscv/macroAssembler_riscv.cpp | 71 +++-- .../cpu/riscv/macroAssembler_riscv.hpp | 20 +- src/hotspot/cpu/riscv/vm_version_riscv.hpp | 7 + .../linux_riscv/vm_version_linux_riscv.cpp | 2 + 5 files changed, 319 insertions(+), 41 deletions(-) diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 922e412356dfd..f6f4c5e04f147 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -506,7 +506,7 @@ class Assembler : public AbstractAssembler { INSN(sllw, 0b0111011, 0b001, 0b0000000); INSN(sraw, 0b0111011, 0b101, 0b0100000); INSN(srlw, 0b0111011, 0b101, 0b0000000); - INSN(mul, 0b0110011, 0b000, 0b0000001); + INSN(_mul, 0b0110011, 0b000, 0b0000001); INSN(mulh, 0b0110011, 0b001, 0b0000001); INSN(mulhsu,0b0110011, 0b010, 0b0000001); INSN(mulhu, 0b0110011, 0b011, 0b0000001); @@ -537,9 +537,9 @@ class Assembler : public AbstractAssembler { } INSN(lb, 0b0000011, 0b000); - INSN(lbu, 0b0000011, 0b100); - INSN(lh, 0b0000011, 0b001); - INSN(lhu, 0b0000011, 0b101); + INSN(_lbu, 0b0000011, 0b100); // Zcb + INSN(_lh, 0b0000011, 0b001); // Zcb + INSN(_lhu, 0b0000011, 0b101); // Zcb INSN(_lw, 0b0000011, 0b010); INSN(lwu, 0b0000011, 0b110); INSN(_ld, 0b0000011, 0b011); @@ -609,8 +609,8 @@ class Assembler : public AbstractAssembler { emit(insn); \ } \ - INSN(sb, Register, 0b0100011, 0b000); - INSN(sh, Register, 0b0100011, 0b001); + INSN(_sb, Register, 0b0100011, 0b000); + INSN(_sh, Register, 0b0100011, 0b001); INSN(_sw, Register, 0b0100011, 0b010); INSN(_sd, Register, 0b0100011, 0b011); INSN(fsw, FloatRegister, 0b0100111, 0b010); @@ -1867,9 +1867,9 @@ enum Nf { } INSN(rev8, 0b0010011, 0b101, 0b011010111000); - INSN(sext_b, 0b0010011, 0b001, 0b011000000100); - INSN(sext_h, 0b0010011, 0b001, 0b011000000101); - INSN(zext_h, 0b0111011, 0b100, 0b000010000000); + INSN(_sext_b, 0b0010011, 0b001, 0b011000000100); + INSN(_sext_h, 0b0010011, 0b001, 0b011000000101); + INSN(_zext_h, 0b0111011, 0b100, 0b000010000000); INSN(clz, 0b0010011, 0b001, 0b011000000000); INSN(clzw, 0b0011011, 0b001, 0b011000000000); INSN(ctz, 0b0010011, 0b001, 0b011000000001); @@ -2581,6 +2581,15 @@ enum Nf { return UseRVC && in_compressible_region(); } + bool do_compress_zcb(Register reg1 = noreg, Register reg2 = noreg) const { + return do_compress() && VM_Version::ext_Zcb.enabled() && + (reg1 == noreg || reg1->is_compressed_valid()) && (reg2 == noreg || reg2->is_compressed_valid()); + } + + bool do_compress_zcb_zbb(Register reg1 = noreg, Register reg2 = noreg) const { + return do_compress_zcb(reg1, reg2) && UseZbb; + } + // -------------------------- // Load/store register // -------------------------- @@ -2915,6 +2924,239 @@ enum Nf { #undef INSN +// -------------- ZCB Instruction Definitions -------------- +// Zcb additional C instructions + private: + // Format CLH, c.lh/c.lhu + template + void c_lh_if(Register Rd_Rs2, Register Rs1, uint32_t uimm) { + assert_cond(uimm == 0 || uimm == 2); + assert_cond(do_compress_zcb(Rd_Rs2, Rs1)); + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b00); + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); + c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1); + c_patch((address)&insn, 6, 6, Unsigned ? 0 : 1); + c_patch_compressed_reg((address)&insn, 7, Rs1); + c_patch((address)&insn, 12, 10, 0b001); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + template + void lh_c_mux(Register Rd_Rs2, Register Rs1, const int32_t uimm) { + if (do_compress_zcb(Rd_Rs2, Rs1) && + (uimm == 0 || uimm == 2)) { + c_lh_if(Rd_Rs2, Rs1, uimm); + } else { + if (Unsigned) { + _lhu(Rd_Rs2, Rs1, uimm); + } else { + _lh(Rd_Rs2, Rs1, uimm); + } + } + } + + // Format CU, c.[sz]ext.*, c.no + template + void c_u_if(Register Rs1) { + assert_cond(do_compress_zcb(Rs1)); + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b01); + c_patch((address)&insn, 4, 2, InstructionType); + c_patch((address)&insn, 6, 5, 0b11); + c_patch_compressed_reg((address)&insn, 7, Rs1); + c_patch((address)&insn, 12, 10, 0b111); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + public: + + // Prerequisites: Zcb + void c_lh(Register Rd_Rs2, Register Rs1, const int32_t uimm) { c_lh_if(Rd_Rs2, Rs1, uimm); } + void lh(Register Rd_Rs2, Register Rs1, const int32_t uimm) { lh_c_mux(Rd_Rs2, Rs1, uimm); } + + // Prerequisites: Zcb + void c_lhu(Register Rd_Rs2, Register Rs1, const int32_t uimm) { c_lh_if(Rd_Rs2, Rs1, uimm); } + void lhu(Register Rd_Rs2, Register Rs1, const int32_t uimm) { lh_c_mux(Rd_Rs2, Rs1, uimm); } + + // Prerequisites: Zcb + // Format CLB, single instruction + void c_lbu(Register Rd_Rs2, Register Rs1, uint32_t uimm) { + assert_cond(uimm <= 3); + assert_cond(do_compress_zcb(Rd_Rs2, Rs1)); + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b00); + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); + c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1); + c_patch((address)&insn, 6, 6, (uimm & nth_bit(0)) >> 0); + c_patch_compressed_reg((address)&insn, 7, Rs1); + c_patch((address)&insn, 12, 10, 0b000); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + void lbu(Register Rd_Rs2, Register Rs1, const int32_t uimm) { + if (do_compress_zcb(Rd_Rs2, Rs1) && + uimm >= 0 && uimm <= 3) { + c_lbu(Rd_Rs2, Rs1, uimm); + } else { + _lbu(Rd_Rs2, Rs1, uimm); + } + } + + // Prerequisites: Zcb + // Format CSB, single instruction + void c_sb(Register Rd_Rs2, Register Rs1, uint32_t uimm) { + assert_cond(uimm <= 3); + assert_cond(do_compress_zcb(Rd_Rs2, Rs1)); + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b00); + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); + c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1); + c_patch((address)&insn, 6, 6, (uimm & nth_bit(0)) >> 0); + c_patch_compressed_reg((address)&insn, 7, Rs1); + c_patch((address)&insn, 12, 10, 0b010); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + void sb(Register Rd_Rs2, Register Rs1, const int32_t uimm) { + if (do_compress_zcb(Rd_Rs2, Rs1) && + uimm >= 0 && uimm <= 3) { + c_sb(Rd_Rs2, Rs1, uimm); + } else { + _sb(Rd_Rs2, Rs1, uimm); + } + } + + // Prerequisites: Zcb + // Format CSH, single instruction + void c_sh(Register Rd_Rs2, Register Rs1, uint32_t uimm) { + assert_cond(uimm == 0 || uimm == 2); + assert_cond(do_compress_zcb(Rd_Rs2, Rs1)); + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b00); + c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); + c_patch((address)&insn, 5, 5, (uimm & nth_bit(1)) >> 1); + c_patch((address)&insn, 6, 6, 0); + c_patch_compressed_reg((address)&insn, 7, Rs1); + c_patch((address)&insn, 12, 10, 0b011); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + void sh(Register Rd_Rs2, Register Rs1, const int32_t uimm) { + if (do_compress_zcb(Rd_Rs2, Rs1) && + (uimm == 0 || uimm == 2)) { + c_sh(Rd_Rs2, Rs1, uimm); + } else { + _sh(Rd_Rs2, Rs1, uimm); + } + } + + // Prerequisites: Zcb + // Format CS + void c_zext_b(Register Rs1) { + assert_cond(do_compress_zcb(Rs1)); + c_u_if<0b000>(Rs1); + } + + // Prerequisites: Zbb + void sext_b(Register Rd_Rs2, Register Rs1) { + assert_cond(UseZbb); + if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) { + c_sext_b(Rd_Rs2); + } else { + _sext_b(Rd_Rs2, Rs1); + } + } + + // Prerequisites: Zcb, Zbb + // Format CS + void c_sext_b(Register Rs1) { + c_u_if<0b001>(Rs1); + } + + // Prerequisites: Zbb + void zext_h(Register Rd_Rs2, Register Rs1) { + assert_cond(UseZbb); + if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) { + c_zext_h(Rd_Rs2); + } else { + _zext_h(Rd_Rs2, Rs1); + } + } + + // Prerequisites: Zcb, Zbb + // Format CS + void c_zext_h(Register Rs1) { + //assert(instruction_premitted(Rs1), "invalid"); + c_u_if<0b010>(Rs1); + } + + // Prerequisites: Zbb + void sext_h(Register Rd_Rs2, Register Rs1) { + assert_cond(UseZbb); + if (do_compress_zcb_zbb(Rd_Rs2, Rs1) && (Rd_Rs2 == Rs1)) { + c_sext_h(Rd_Rs2); + } else { + _sext_h(Rd_Rs2, Rs1); + } + } + + // Prerequisites: Zcb, Zbb + // Format CS + void c_sext_h(Register Rs1) { + c_u_if<0b011>(Rs1); + } + + // Prerequisites: Zcb, Zba + // Format CS + void c_zext_w(Register Rs1) { + c_u_if<0b100>(Rs1); + } + + // Prerequisites: Zcb + // Format CS + void c_not(Register Rs1) { + c_u_if<0b101>(Rs1); + } + + // Prerequisites: Zcb (M or Zmmul) + // Format CA, c.mul + void c_mul(Register Rd_Rs1, Register Rs2) { + uint16_t insn = 0; + c_patch((address)&insn, 1, 0, 0b01); + c_patch_compressed_reg((address)&insn, 2, Rs2); + c_patch((address)&insn, 6, 5, 0b10); + c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); + c_patch((address)&insn, 12, 10, 0b111); + c_patch((address)&insn, 15, 13, 0b100); + emit_int16(insn); + } + + void mul(Register Rd, Register Rs1, Register Rs2) { + if (Rd != Rs1 && Rd != Rs2) { + // Three registers needed without a mv, emit uncompressed + _mul(Rd, Rs1, Rs2); + return; + } + + // Rd is either Rs1 or Rs2 + if (!do_compress_zcb(Rs2, Rs1)) { + _mul(Rd, Rs1, Rs2); + } else { + if (Rd == Rs2) { + Rs2 = Rs1; + } else { + assert(Rd == Rs1, "must be"); + } + c_mul(Rd, Rs2); + } + } + // Stack overflow checking virtual void bang_stack_with_offset(int offset) { Unimplemented(); } diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 3e72865ca62f0..86e79df57009e 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -4488,41 +4488,54 @@ void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp } void MacroAssembler::zero_extend(Register dst, Register src, int bits) { - if (UseZba && bits == 32) { - zext_w(dst, src); - return; - } - - if (UseZbb && bits == 16) { - zext_h(dst, src); - return; - } - - if (bits == 8) { - zext_b(dst, src); - } else { - slli(dst, src, XLEN - bits); - srli(dst, dst, XLEN - bits); + switch (bits) { + case 32: + if (UseZba) { + zext_w(dst, src); + return; + } + break; + case 16: + if (UseZbb) { + zext_h(dst, src); + return; + } + break; + case 8: + if (UseZbb) { + zext_b(dst, src); + return; + } + break; + default: + break; } + slli(dst, src, XLEN - bits); + srli(dst, dst, XLEN - bits); } void MacroAssembler::sign_extend(Register dst, Register src, int bits) { - if (UseZbb) { - if (bits == 8) { - sext_b(dst, src); + switch (bits) { + case 32: + sext_w(dst, src); return; - } else if (bits == 16) { - sext_h(dst, src); - return; - } - } - - if (bits == 32) { - sext_w(dst, src); - } else { - slli(dst, src, XLEN - bits); - srai(dst, dst, XLEN - bits); + case 16: + if (UseZbb) { + sext_h(dst, src); + return; + } + break; + case 8: + if (UseZbb) { + sext_b(dst, src); + return; + } + break; + default: + break; } + slli(dst, src, XLEN - bits); + srai(dst, dst, XLEN - bits); } void MacroAssembler::cmp_x2i(Register dst, Register src1, Register src2, diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 3b110cd3e28db..f30d7840206a0 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -473,7 +473,11 @@ class MacroAssembler: public Assembler { } inline void notr(Register Rd, Register Rs) { - xori(Rd, Rs, -1); + if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) { + c_not(Rd); + } else { + xori(Rd, Rs, -1); + } } inline void neg(Register Rd, Register Rs) { @@ -489,7 +493,12 @@ class MacroAssembler: public Assembler { } inline void zext_b(Register Rd, Register Rs) { - andi(Rd, Rs, 0xFF); + if (do_compress_zcb(Rd, Rs) && + (Rd == Rs)) { + c_zext_b(Rd); + } else { + andi(Rd, Rs, 0xFF); + } } inline void seqz(Register Rd, Register Rs) { @@ -511,7 +520,12 @@ class MacroAssembler: public Assembler { // Bit-manipulation extension pseudo instructions // zero extend word inline void zext_w(Register Rd, Register Rs) { - add_uw(Rd, Rs, zr); + assert(UseZba, "must be"); + if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) { + c_zext_w(Rd); + } else { + add_uw(Rd, Rs, zr); + } } // Floating-point data-processing pseudo instructions diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp index 3c769ebfe2afe..5af3a173ec33f 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.hpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp @@ -36,6 +36,7 @@ #include "utilities/sizes.hpp" class RiscvHwprobe; +class Assembler; class VM_Version : public Abstract_VM_Version { friend RiscvHwprobe; @@ -110,6 +111,9 @@ class VM_Version : public Abstract_VM_Version { // Zic64b Cache blocks must be 64 bytes in size, naturally aligned in the address space. // Zihintpause Pause instruction HINT // + // Zc Code Size Reduction - Additional compressed instructions. + // Zcb Simple code-size saving instructions + // // Other features and settings // mvendorid Manufactory JEDEC id encoded, ISA vol 2 3.1.2.. // marchid Id for microarch. Mvendorid plus marchid uniquely identify the microarch. @@ -117,6 +121,8 @@ class VM_Version : public Abstract_VM_Version { // unaligned_access Unaligned memory accesses (unknown, unspported, emulated, slow, firmware, fast) // satp mode SATP bits (number of virtual addr bits) mbare, sv39, sv48, sv57, sv64 + public: + #define RV_NO_FLAG_BIT (BitsPerWord+1) // nth_bit will return 0 on values larger than BitsPerWord // declaration name , extension name, bit pos ,in str, mapped flag) @@ -137,6 +143,7 @@ class VM_Version : public Abstract_VM_Version { decl(ext_Zbb , "Zbb" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZbb)) \ decl(ext_Zbc , "Zbc" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zbs , "Zbs" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZbs)) \ + decl(ext_Zcb , "Zcb" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zicsr , "Zicsr" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zifencei , "Zifencei" , RV_NO_FLAG_BIT, true , NO_UPDATE_DEFAULT) \ decl(ext_Zic64b , "Zic64b" , RV_NO_FLAG_BIT, true , UPDATE_DEFAULT(UseZic64b)) \ diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp index 6e93406b1a353..354dbd70bb4e1 100644 --- a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp @@ -240,6 +240,8 @@ void VM_Version::rivos_features() { ext_Zbb.enable_feature(); ext_Zbs.enable_feature(); + ext_Zcb.enable_feature(); + ext_Zicsr.enable_feature(); ext_Zifencei.enable_feature(); ext_Zic64b.enable_feature();