diff --git a/ext/softfloat/fall_maxmin.c b/ext/softfloat/fall_maxmin.c
index f40734c5d0..7efb86d1a5 100644
--- a/ext/softfloat/fall_maxmin.c
+++ b/ext/softfloat/fall_maxmin.c
@@ -37,31 +37,31 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define COMPARE_MAX(a, b, bits) \
 float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b )          \
 {                                                                                              \
+    bool greater = f ## bits ## _lt_quiet(b, a) ||                                             \
+               (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v));                           \
+                                                                                               \
     if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) {                              \
         union ui ## bits ## _f ## bits  ui;                                                    \
         ui.ui = defaultNaNF ## bits ## UI;                                                     \
         return ui.f;                                                                           \
+    } else {                                                                                   \
+        return greater || isNaNF ## bits ## UI((b).v) ? a : b;                                 \
     }                                                                                          \
-                                                                                               \
-    bool greater = f ## bits ## _lt_quiet(b, a) ||                                             \
-               (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v));                           \
-                                                                                               \
-    return greater || isNaNF ## bits ## UI((b).v) ? a : b;                                     \
 }
 
 #define COMPARE_MIN(a, b, bits) \
 float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b )          \
 {                                                                                              \
+    bool less = f ## bits ## _lt_quiet(a, b) ||                                                \
+               (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v));                           \
+                                                                                               \
     if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) {                              \
         union ui ## bits ## _f ## bits  ui;                                                    \
         ui.ui = defaultNaNF ## bits ## UI;                                                     \
         return ui.f;                                                                           \
+    } else {                                                                                   \
+        return less || isNaNF ## bits ## UI((b).v) ? a : b;                                    \
     }                                                                                          \
-                                                                                               \
-    bool greater = f ## bits ## _lt_quiet(a, b) ||                                             \
-               (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v));                           \
-                                                                                               \
-    return greater || isNaNF ## bits ## UI((b).v) ? a : b;                                     \
 }
 
 COMPARE_MAX(a, b, 16);
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index cf29250eda..8bb67abe91 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -2334,7 +2334,8 @@ decode QUADRANT default Unknown::unknown() {
                         // The encodings corresponding to the masked versions
                         // (vm=0) of vfmv.f.s are reserved
                         0x1: VectorNonSplitFormat::vfmv_f_s({{
-                            Fd_bits = Vs1_vu[0];
+                            freg_t fd = freg(Vs2_vu[0]);
+                            Fd_bits = fd.v;
                         }}, OPFVV, VectorDummyOp);
                     }
                 }
diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
index 43e02385ee..0f50897fba 100644
--- a/src/arch/riscv/isa/formats/vector_arith.isa
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -69,6 +69,54 @@ let {{
             return '''
             uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx;
             ''' + code
+
+    def wideningOpRegisterConstraintChecks(code):
+        return '''
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+            if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) {
+                std::string error =
+                    csprintf("Unaligned Vd group in Widening op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) {
+                // A destination vector register group can overlap a source vector
+                // register group if The destination EEW is greater than the source
+                // EEW, the source EMUL is at least 1, and the overlap is in the
+                // highest- numbered part of the destination register group.
+                std::string error =
+                    csprintf("Unsupported overlap in Vs2 and Vd for Widening op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            ''' + code
+
+    def narrowingOpRegisterConstraintChecks(code):
+        return '''
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+            if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) {
+                std::string error =
+                    csprintf("Unaligned VS2 group in Narrowing op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) {
+                // A destination vector register group can overlap a source vector
+                // register group The destination EEW is smaller than the source EEW
+                // and the overlap is in the lowest-numbered part of the source
+                // register group
+                std::string error =
+                    csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+        ''' + code
+
+    def fflags_wrapper(code):
+        return '''
+        RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS);
+        std::feclearexcept(FE_ALL_EXCEPT);
+        ''' + code + '''
+        FFLAGS |= softfloat_exceptionFlags;
+        softfloat_exceptionFlags = 0;
+        xc->setMiscReg(MISCREG_FFLAGS, FFLAGS);
+        '''
 }};
 
 
@@ -239,6 +287,8 @@ def format VectorIntWideningFormat(code, category, *flags) {{
         code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
 
+    code = wideningOpRegisterConstraintChecks(code)
+
     vm_decl_rd = ""
     if v0_required:
         vm_decl_rd = vmDeclAndReadData()
@@ -295,6 +345,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
+    code = narrowingOpRegisterConstraintChecks(code)
     vm_decl_rd = vmDeclAndReadData()
 
     microiop = InstObjParams(name + "_micro",
@@ -313,7 +364,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
     header_output = \
         VectorIntWideningMicroDeclare.subst(microiop) + \
         VectorIntWideningMicroConstructor.subst(microiop) + \
-        VectorIntWideningMicroExecute.subst(microiop) + \
+        VectorIntNarrowingMicroExecute.subst(microiop) + \
         VectorIntWideningMacroDeclare.subst(iop) + \
         VectorIntWideningMacroConstructor.subst(iop)
 
@@ -480,6 +531,7 @@ def format VectorFloatFormat(code, category, *flags) {{
     if need_elem_idx:
         code = eiDeclarePrefix(code)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
 
     vm_decl_rd = ""
     if v0_required:
@@ -525,6 +577,7 @@ def format VectorFloatCvtFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
 
     vm_decl_rd = vmDeclAndReadData()
 
@@ -588,6 +641,9 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     if need_elem_idx:
         code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
+
+    code = wideningOpRegisterConstraintChecks(code)
 
     vm_decl_rd = ""
     if v0_required:
@@ -633,6 +689,7 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
 
     vm_decl_rd = vmDeclAndReadData()
 
@@ -676,6 +733,8 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
+    code = narrowingOpRegisterConstraintChecks(code)
 
     vm_decl_rd = vmDeclAndReadData()
 
@@ -694,7 +753,7 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
     header_output = \
         VectorFloatCvtMicroDeclare.subst(microiop) + \
         VectorFloatMicroConstructor.subst(microiop) + \
-        VectorFloatWideningMicroExecute.subst(microiop) + \
+        VectorFloatNarrowingMicroExecute.subst(microiop) + \
         VectorFloatCvtMacroDeclare.subst(iop) + \
         VectorIntWideningMacroConstructor.subst(iop)
 
@@ -728,6 +787,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code)
     code = loopWrapper(code)
+    code = fflags_wrapper(code)
 
     microiop = InstObjParams(name + "_micro",
         Name + "Micro",
@@ -868,15 +928,12 @@ def format Vector1Vs1RdMaskFormat(code, category, *flags){{
 
 def format VectorNonSplitFormat(code, category, *flags) {{
     inst_name, inst_suffix = name.split("_", maxsplit=1)
-    v0_required = inst_name not in ["vmv", "vfmv"]
     vm_decl_rd = ""
-    if v0_required:
-        vm_decl_rd = vmDeclAndReadData()
 
-    mask_cond = v0_required
     set_vm_idx = ""
-    if mask_cond:
-        set_vm_idx = setSrcVm()
+
+    if inst_name == "vfmv" :
+        code = fflags_wrapper(code)
 
     iop = InstObjParams(name,
         Name,
@@ -886,12 +943,15 @@ def format VectorNonSplitFormat(code, category, *flags) {{
          'set_vm_idx': set_vm_idx},
         flags)
 
+
     if inst_name == "vfmv" :
         execute_block = VectorFloatNonSplitExecute.subst(iop)
         decode_block = VectorFloatDecodeBlock.subst(iop)
-    else :
-        execute_block = VectorNonSplitExecute.subst(iop)
+    elif inst_name == "vmv" :
+        execute_block = VectorIntNonSplitExecute.subst(iop)
         decode_block = VectorIntDecodeBlock.subst(iop)
+    else :
+        error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name)
 
     # Because of the use of templates, we had to put all parts in header to
     # keep the compiler happy.
@@ -1006,6 +1066,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{
         using et = ElemType;
         using vu = decltype(et::v);
     '''
+
+    code = fflags_wrapper(code)
+
     microiop = InstObjParams(name + "_micro",
         Name + "Micro",
         'VectorArithMicroInst',
diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa
index d588933501..0cfdf04f51 100644
--- a/src/arch/riscv/isa/templates/vector_arith.isa
+++ b/src/arch/riscv/isa/templates/vector_arith.isa
@@ -355,7 +355,38 @@ Fault
 
     if (machInst.vill)
         return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
 
+def template VectorIntNarrowingMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  Trace::InstRecord* traceData) const
+{
+    using vu [[maybe_unused]] = std::make_unsigned_t<ElemType>;
+    using vi [[maybe_unused]] = std::make_signed_t<ElemType>;
+    using vwu [[maybe_unused]] = typename double_width<vu>::type;
+    using vwi [[maybe_unused]] = typename double_width<vi>::type;
+    [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8;
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
     const int64_t vlmul = vtype_vlmul(machInst.vtype8);
     const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
     const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
@@ -576,6 +607,40 @@ Fault
 
 }};
 
+def template VectorFloatNarrowingMicroExecute {{
+
+template <typename ElemType>
+Fault
+%(class_name)s<ElemType>::execute(ExecContext* xc,
+                                  Trace::InstRecord* traceData) const
+{
+    using et = ElemType;
+    using vu [[maybe_unused]] = decltype(et::v);
+    using ewt = typename double_width<et>::type;
+    using vwu = decltype(ewt::v);
+
+    if (machInst.vill)
+        return std::make_shared<IllegalInstFault>("VILL is set", machInst);
+
+    VRM_REQUIRED;
+
+    const int64_t vlmul = vtype_vlmul(machInst.vtype8);
+    const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true);
+    const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2;
+    [[maybe_unused]] const size_t offset =
+        (this->microIdx % 2 == 0) ? 0 : micro_vlmax;
+
+    %(op_decl)s;
+    %(op_rd)s;
+    %(vm_decl_rd)s;
+    %(copy_old_vd)s;
+    %(code)s;
+    %(op_wb)s;
+    return NoFault;
+}
+
+}};
+
 def template VectorFloatWideningDecodeBlock {{
 
 switch(machInst.vtype8.vsew) {
@@ -1166,7 +1231,7 @@ template<typename ElemType>
 
 }};
 
-def template VectorNonSplitExecute {{
+def template VectorIntNonSplitExecute {{
 
 template <typename ElemType>
 Fault
diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh
index c0934bd0ce..b505cd2641 100644
--- a/src/arch/riscv/regs/float.hh
+++ b/src/arch/riscv/regs/float.hh
@@ -105,7 +105,10 @@ static constexpr float64_t f64(freg_t r) { return r; }
 static constexpr freg_t freg(float16_t f) { return {boxF16(f.v)}; }
 static constexpr freg_t freg(float32_t f) { return {boxF32(f.v)}; }
 static constexpr freg_t freg(float64_t f) { return f; }
-static constexpr freg_t freg(uint_fast16_t f) { return {f}; }
+
+static constexpr freg_t freg(uint16_t f) { return {boxF16(f)}; }
+static constexpr freg_t freg(uint32_t f) { return {boxF32(f)}; }
+static constexpr freg_t freg(uint64_t f) { return {f}; }
 
 namespace float_reg
 {