diff --git a/ext/softfloat/fall_maxmin.c b/ext/softfloat/fall_maxmin.c index f40734c5d0..7efb86d1a5 100644 --- a/ext/softfloat/fall_maxmin.c +++ b/ext/softfloat/fall_maxmin.c @@ -37,31 +37,31 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define COMPARE_MAX(a, b, bits) \ float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b ) \ { \ + bool greater = f ## bits ## _lt_quiet(b, a) || \ + (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ + \ if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ union ui ## bits ## _f ## bits ui; \ ui.ui = defaultNaNF ## bits ## UI; \ return ui.f; \ + } else { \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ } \ - \ - bool greater = f ## bits ## _lt_quiet(b, a) || \ - (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ - \ - return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ } #define COMPARE_MIN(a, b, bits) \ float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b ) \ { \ + bool less = f ## bits ## _lt_quiet(a, b) || \ + (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ + \ if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ union ui ## bits ## _f ## bits ui; \ ui.ui = defaultNaNF ## bits ## UI; \ return ui.f; \ + } else { \ + return less || isNaNF ## bits ## UI((b).v) ? a : b; \ } \ - \ - bool greater = f ## bits ## _lt_quiet(a, b) || \ - (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ - \ - return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ } COMPARE_MAX(a, b, 16); diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index cf29250eda..8bb67abe91 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2334,7 +2334,8 @@ decode QUADRANT default Unknown::unknown() { // The encodings corresponding to the masked versions // (vm=0) of vfmv.f.s are reserved 0x1: VectorNonSplitFormat::vfmv_f_s({{ - Fd_bits = Vs1_vu[0]; + freg_t fd = freg(Vs2_vu[0]); + Fd_bits = fd.v; }}, OPFVV, VectorDummyOp); } } diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index 43e02385ee..0f50897fba 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -69,6 +69,54 @@ let {{ return ''' uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; ''' + code + + def wideningOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned Vd group in Widening op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group if The destination EEW is greater than the source + // EEW, the source EMUL is at least 1, and the overlap is in the + // highest- numbered part of the destination register group. + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Widening op"); + return std::make_shared(error, machInst); + } + ''' + code + + def narrowingOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned VS2 group in Narrowing op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group The destination EEW is smaller than the source EEW + // and the overlap is in the lowest-numbered part of the source + // register group + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op"); + return std::make_shared(error, machInst); + } + ''' + code + + def fflags_wrapper(code): + return ''' + RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); + std::feclearexcept(FE_ALL_EXCEPT); + ''' + code + ''' + FFLAGS |= softfloat_exceptionFlags; + softfloat_exceptionFlags = 0; + xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + ''' }}; @@ -239,6 +287,8 @@ def format VectorIntWideningFormat(code, category, *flags) {{ code = eiDeclarePrefix(code, widening=True) code = loopWrapper(code) + code = wideningOpRegisterConstraintChecks(code) + vm_decl_rd = "" if v0_required: vm_decl_rd = vmDeclAndReadData() @@ -295,6 +345,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code, widening=True) code = loopWrapper(code) + code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() microiop = InstObjParams(name + "_micro", @@ -313,7 +364,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ header_output = \ VectorIntWideningMicroDeclare.subst(microiop) + \ VectorIntWideningMicroConstructor.subst(microiop) + \ - VectorIntWideningMicroExecute.subst(microiop) + \ + VectorIntNarrowingMicroExecute.subst(microiop) + \ VectorIntWideningMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) @@ -480,6 +531,7 @@ def format VectorFloatFormat(code, category, *flags) {{ if need_elem_idx: code = eiDeclarePrefix(code) code = loopWrapper(code) + code = fflags_wrapper(code) vm_decl_rd = "" if v0_required: @@ -525,6 +577,7 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) + code = fflags_wrapper(code) vm_decl_rd = vmDeclAndReadData() @@ -588,6 +641,9 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ if need_elem_idx: code = eiDeclarePrefix(code, widening=True) code = loopWrapper(code) + code = fflags_wrapper(code) + + code = wideningOpRegisterConstraintChecks(code) vm_decl_rd = "" if v0_required: @@ -633,6 +689,7 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) + code = fflags_wrapper(code) vm_decl_rd = vmDeclAndReadData() @@ -676,6 +733,8 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) + code = fflags_wrapper(code) + code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() @@ -694,7 +753,7 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ header_output = \ VectorFloatCvtMicroDeclare.subst(microiop) + \ VectorFloatMicroConstructor.subst(microiop) + \ - VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorFloatNarrowingMicroExecute.subst(microiop) + \ VectorFloatCvtMacroDeclare.subst(iop) + \ VectorIntWideningMacroConstructor.subst(iop) @@ -728,6 +787,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) + code = fflags_wrapper(code) microiop = InstObjParams(name + "_micro", Name + "Micro", @@ -868,15 +928,12 @@ def format Vector1Vs1RdMaskFormat(code, category, *flags){{ def format VectorNonSplitFormat(code, category, *flags) {{ inst_name, inst_suffix = name.split("_", maxsplit=1) - v0_required = inst_name not in ["vmv", "vfmv"] vm_decl_rd = "" - if v0_required: - vm_decl_rd = vmDeclAndReadData() - mask_cond = v0_required set_vm_idx = "" - if mask_cond: - set_vm_idx = setSrcVm() + + if inst_name == "vfmv" : + code = fflags_wrapper(code) iop = InstObjParams(name, Name, @@ -886,12 +943,15 @@ def format VectorNonSplitFormat(code, category, *flags) {{ 'set_vm_idx': set_vm_idx}, flags) + if inst_name == "vfmv" : execute_block = VectorFloatNonSplitExecute.subst(iop) decode_block = VectorFloatDecodeBlock.subst(iop) - else : - execute_block = VectorNonSplitExecute.subst(iop) + elif inst_name == "vmv" : + execute_block = VectorIntNonSplitExecute.subst(iop) decode_block = VectorIntDecodeBlock.subst(iop) + else : + error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) # Because of the use of templates, we had to put all parts in header to # keep the compiler happy. @@ -1006,6 +1066,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ using et = ElemType; using vu = decltype(et::v); ''' + + code = fflags_wrapper(code) + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index d588933501..0cfdf04f51 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -355,7 +355,38 @@ Fault if (machInst.vill) return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; +def template VectorIntNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); const int64_t vlmul = vtype_vlmul(machInst.vtype8); const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; @@ -576,6 +607,40 @@ Fault }}; +def template VectorFloatNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + def template VectorFloatWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { @@ -1166,7 +1231,7 @@ template }}; -def template VectorNonSplitExecute {{ +def template VectorIntNonSplitExecute {{ template Fault diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh index c0934bd0ce..b505cd2641 100644 --- a/src/arch/riscv/regs/float.hh +++ b/src/arch/riscv/regs/float.hh @@ -105,7 +105,10 @@ static constexpr float64_t f64(freg_t r) { return r; } static constexpr freg_t freg(float16_t f) { return {boxF16(f.v)}; } static constexpr freg_t freg(float32_t f) { return {boxF32(f.v)}; } static constexpr freg_t freg(float64_t f) { return f; } -static constexpr freg_t freg(uint_fast16_t f) { return {f}; } + +static constexpr freg_t freg(uint16_t f) { return {boxF16(f)}; } +static constexpr freg_t freg(uint32_t f) { return {boxF32(f)}; } +static constexpr freg_t freg(uint64_t f) { return {f}; } namespace float_reg {