From 929ef1b6e2d1eb726f67bf6c30edb8bcbf7896d2 Mon Sep 17 00:00:00 2001 From: MichaelJSr Date: Mon, 13 Jan 2025 16:45:13 -0800 Subject: [PATCH 1/4] Remove unused EXTV code, clean up code, pragma once around vpu.h --- sim/simx/Makefile | 2 +- sim/simx/arch.h | 6 - sim/simx/emulator.cpp | 12 +- sim/simx/execute.cpp | 12 +- sim/simx/{execute_v.cpp => vpu.cpp} | 2391 +---------------- sim/simx/vpu.h | 2391 +++++++++++++++++ tests/riscv/riscv-vector-tests/README | 2 +- tests/riscv/riscv-vector-tests/run-test.sh.in | 3 - 8 files changed, 2399 insertions(+), 2420 deletions(-) rename sim/simx/{execute_v.cpp => vpu.cpp} (55%) create mode 100644 sim/simx/vpu.h diff --git a/sim/simx/Makefile b/sim/simx/Makefile index d3e726bbe..4b0fa410f 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -22,7 +22,7 @@ SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $( # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) - SRCS += $(SRC_DIR)/execute_v.cpp + SRCS += $(SRC_DIR)/vpu.cpp endif # Debugging diff --git a/sim/simx/arch.h b/sim/simx/arch.h index d68345db6..6becf5c91 100644 --- a/sim/simx/arch.h +++ b/sim/simx/arch.h @@ -29,7 +29,6 @@ class Arch { uint16_t num_cores_; uint16_t num_clusters_; uint16_t socket_size_; - uint16_t vsize_; uint16_t num_barriers_; uint64_t local_mem_base_; @@ -40,7 +39,6 @@ class Arch { , num_cores_(num_cores) , num_clusters_(NUM_CLUSTERS) , socket_size_(SOCKET_SIZE) - , vsize_(VLEN / 8) , num_barriers_(NUM_BARRIERS) , local_mem_base_(LMEM_BASE_ADDR) {} @@ -73,10 +71,6 @@ class Arch { return socket_size_; } - uint16_t vsize() const { - return vsize_; - } - }; } \ No newline at end of file diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index b834a87f2..4bb94915e 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -33,7 +33,7 @@ using namespace vortex; Emulator::warp_t::warp_t(const Arch& arch) : ireg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) , freg_file(arch.num_threads(), std::vector(MAX_NUM_REGS)) - , vreg_file(MAX_NUM_REGS, std::vector(arch.vsize())) + , vreg_file(MAX_NUM_REGS, std::vector(MAX_NUM_REGS)) , uuid(0) {} @@ -77,16 +77,6 @@ void Emulator::warp_t::clear(uint64_t startup_addr) { #endif } } - - for (auto& reg_file : this->vreg_file) { - for (auto& reg : reg_file) { - #ifndef NDEBUG - reg = 0; - #else - reg = std::rand(); - #endif - } - } } /////////////////////////////////////////////////////////////////////////////// diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 436d43486..86623a00c 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -932,7 +932,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { for (uint32_t t = thread_start; t < num_threads; ++t) { if (!warp.tmask.test(t)) continue; - uint32_t frm = (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, t, wid) : func3; + uint32_t frm = this->get_fpu_rm(func3, t, wid); uint32_t fflags = 0; switch (func7) { case 0x00: { // RV32F: FADD.S @@ -1247,10 +1247,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { break; } } - if (fflags) { - this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); - this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); - } + this->update_fcrs(fflags, t, wid); } rd_write = true; break; @@ -1304,10 +1301,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) { default: break; } - if (fflags) { - this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid); - this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid); - } + this->update_fcrs(fflags, t, wid); } rd_write = true; break; diff --git a/sim/simx/execute_v.cpp b/sim/simx/vpu.cpp similarity index 55% rename from sim/simx/execute_v.cpp rename to sim/simx/vpu.cpp index d14338024..63ed8fcc2 100644 --- a/sim/simx/execute_v.cpp +++ b/sim/simx/vpu.cpp @@ -1,5 +1,5 @@ // This is a fork of https://github.com/troibe/vortex/tree/simx-v2-vector -// The purpose of this fork is to make the simx-v2-vector up to date with master +// The purpose of this fork is to make simx-v2-vector up to date with master // Thanks to Troibe for his amazing work #include "emulator.h" @@ -10,2397 +10,10 @@ #include #include #include +#include "vpu.h" using namespace vortex; -template -class Add { -public: - static R apply(T first, T second, R) { - return (R)first + (R)second; - } - static std::string name() { return "Add"; } -}; - -template -class Sub { -public: - static R apply(T first, T second, R) { - return (R)second - (R)first; - } - static std::string name() { return "Sub"; } -}; - -template -class Adc { -public: - static R apply(T first, T second, R third) { - return (R)first + (R)second + third; - } - static std::string name() { return "Adc"; } -}; - -template -class Madc { -public: - static R apply(T first, T second, R third) { - return ((R)first + (R)second + third) > (R)std::numeric_limits::max(); - } - static std::string name() { return "Madc"; } -}; - -template -class Sbc { -public: - static R apply(T first, T second, R third) { - return (R)second - (R)first - third; - } - static std::string name() { return "Sbc"; } -}; - -template -class Msbc { -public: - static R apply(T first, T second, R third) { - return (R)second < ((R)first + third); - } - static std::string name() { return "Msbc"; } -}; - -template -class Ssub { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - T unclippedResult = second - first; - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Ssub"; } -}; - -template -class Ssubu { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - if (first > second) { - vxsat_ = true; - return 0; - } else { - vxsat_ = false; - return second - first; - } - } - static std::string name() { return "Ssubu"; } -}; - -template -class Sadd { -public: - static R apply(T first, T second, uint32_t, uint32_t &vxsat_) { - // rounding mode is not relevant for this operation - T unclippedResult = second + first; - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Sadd"; } -}; - -template -class Rsub { -public: - static R apply(T first, T second, R) { - return first - second; - } - static std::string name() { return "Rsub"; } -}; - -template -class Div { -public: - static R apply(T first, T second, R) { - // logic taken from scalar div - if (first == 0) { - return -1; - } else if (second == std::numeric_limits::min() && first == T(-1)) { - return second; - } else { - return (R)second / (R)first; - } - } - static std::string name() { return "Div"; } -}; - -template -class Rem { -public: - static R apply(T first, T second, R) { - // logic taken from scalar rem - if (first == 0) { - return second; - } else if (second == std::numeric_limits::min() && first == T(-1)) { - return 0; - } else { - return (R)second % (R)first; - } - } - static std::string name() { return "Rem"; } -}; - -template -class Mul { -public: - static R apply(T first, T second, R) { - return (R)first * (R)second; - } - static std::string name() { return "Mul"; } -}; - -template -class Mulsu { -public: - static R apply(T first, T second, R) { - R first_ext = zext((R)first, (sizeof(T) * 8)); - return first_ext * (R)second; - } - static std::string name() { return "Mulsu"; } -}; - -template -class Mulh { -public: - static R apply(T first, T second, R) { - __int128_t first_ext = sext((__int128_t)first, (sizeof(T) * 8)); - __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); - return (first_ext * second_ext) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulh"; } -}; - -template -class Mulhsu { -public: - static R apply(T first, T second, R) { - __int128_t first_ext = zext((__int128_t)first, (sizeof(T) * 8)); - __int128_t second_ext = sext((__int128_t)second, (sizeof(T) * 8)); - return (first_ext * second_ext) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulhsu"; } -}; - -template -class Mulhu { -public: - static R apply(T first, T second, R) { - return ((__uint128_t)first * (__uint128_t)second) >> (sizeof(T) * 8); - } - static std::string name() { return "Mulhu"; } -}; - -template -class Madd { -public: - static R apply(T first, T second, R third) { - return ((R)first * third) + (R)second; - } - static std::string name() { return "Madd"; } -}; - -template -class Nmsac { -public: - static R apply(T first, T second, R third) { - return -((R)first * (R)second) + third; - } - static std::string name() { return "Nmsac"; } -}; - -template -class Macc { -public: - static R apply(T first, T second, R third) { - return ((R)first * (R)second) + third; - } - static std::string name() { return "Macc"; } -}; - -template -class Maccsu { -public: - static R apply(T first, T second, R third) { - R first_ext = sext((R)first, (sizeof(T) * 8)); - R second_ext = zext((R)second, (sizeof(T) * 8)); - return (first_ext * second_ext) + third; - } - static std::string name() { return "Maccsu"; } -}; - -template -class Maccus { -public: - static R apply(T first, T second, R third) { - R first_ext = zext((R)first, (sizeof(T) * 8)); - R second_ext = sext((R)second, (sizeof(T) * 8)); - return (first_ext * second_ext) + third; - } - static std::string name() { return "Maccus"; } -}; - -template -class Nmsub { -public: - static R apply(T first, T second, R third) { - return -((R)first * third) + (R)second; - } - static std::string name() { return "Nmsub"; } -}; - -template -class Min { -public: - static R apply(T first, T second, R) { - return std::min(first, second); - } - static std::string name() { return "Min"; } -}; - -template -class Max { -public: - static R apply(T first, T second, R) { - return std::max(first, second); - } - static std::string name() { return "Max"; } -}; - -template -class And { -public: - static R apply(T first, T second, R) { - return first & second; - } - static std::string name() { return "And"; } -}; - -template -class Or { -public: - static R apply(T first, T second, R) { - return first | second; - } - static std::string name() { return "Or"; } -}; - -template -class Xor { -public: - static R apply(T first, T second, R) { - return first ^ second; - } - static std::string name() { return "Xor"; } -}; - -template -class Sll { -public: - static R apply(T first, T second, R) { - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - return second << (first & (sizeof(T) * 8 - 1)); - } - static std::string name() { return "Sll"; } -}; - -template -bool bitAt(T value, R pos, R negOffset) { - R offsetPos = pos - negOffset; - return pos >= negOffset && ((value >> offsetPos) & 0x1); -} - -template -bool anyBitUpTo(T value, R to, R negOffset) { - R offsetTo = to - negOffset; - return to >= negOffset && (value & (((R)1 << (offsetTo + 1)) - 1)); -} - -template -bool roundBit(T value, R shiftDown, uint32_t vxrm) { - switch (vxrm) { - case 0: // round-to-nearest-up - return bitAt(value, shiftDown, (R)1); - case 1: // round-to-nearest-even - return bitAt(value, shiftDown, (R)1) && (anyBitUpTo(value, shiftDown, (R)2) || bitAt(value, shiftDown, (R)0)); - case 2: // round-down (truncate) - return 0; - case 3: // round-to-odd - return !bitAt(value, shiftDown, (R)0) && anyBitUpTo(value, shiftDown, (R)1); - default: - std::cout << "Roundoff - invalid value for vxrm: " << vxrm << std::endl; - std::abort(); - } -} - -template -class SrlSra { -public: - static R apply(T first, T second, R) { - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - return second >> (first & (sizeof(T) * 8 - 1)); - } - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - // Only the low lg2(SEW) bits of the shift-amount value are used to control the shift amount. - T firstValid = first & (sizeof(T) * 8 - 1); - return apply(firstValid, second, 0) + roundBit(second, firstValid, vxrm); - } - static std::string name() { return "SrlSra"; } -}; - -template -class Aadd { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - T sum = second + first; - return (sum >> 1) + roundBit(sum, 1, vxrm); - } - static std::string name() { return "Aadd"; } -}; - -template -class Asub { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t) { - // Saturation is not relevant for this operation - T difference = second - first; - return (difference >> 1) + roundBit(difference, 1, vxrm); - } - static std::string name() { return "Asub"; } -}; - -template -class Eq { -public: - static R apply(T first, T second, R) { - return first == second; - } - static std::string name() { return "Eq"; } -}; - -template -class Ne { -public: - static R apply(T first, T second, R) { - return first != second; - } - static std::string name() { return "Ne"; } -}; - -template -class Lt { -public: - static R apply(T first, T second, R) { - return first > second; - } - static std::string name() { return "Lt"; } -}; - -template -class Le { -public: - static R apply(T first, T second, R) { - return first >= second; - } - static std::string name() { return "Le"; } -}; - -template -class Gt { -public: - static R apply(T first, T second, R) { - return first < second; - } - static std::string name() { return "Gt"; } -}; - -template -class AndNot { -public: - static R apply(T first, T second, R) { - return second & ~first; - } - static std::string name() { return "AndNot"; } -}; - -template -class OrNot { -public: - static R apply(T first, T second, R) { - return second | ~first; - } - static std::string name() { return "OrNot"; } -}; - -template -class Nand { -public: - static R apply(T first, T second, R) { - return ~(second & first); - } - static std::string name() { return "Nand"; } -}; - -template -class Mv { -public: - static R apply(T first, T, R) { - return first; - } - static std::string name() { return "Mv"; } -}; - -template -class Nor { -public: - static R apply(T first, T second, R) { - return ~(second | first); - } - static std::string name() { return "Nor"; } -}; - -template -class Xnor { -public: - static R apply(T first, T second, R) { - return ~(second ^ first); - } - static std::string name() { return "Xnor"; } -}; - -template -class Fadd { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fadd_s(first, second, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fadd_d(first_d, second_d, frm, &fflags); - } else { - std::cout << "Fadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fadd"; } -}; - -template -class Fsub { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fsub_s(second, first, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fsub_d(second_d, first_d, frm, &fflags); - } else { - std::cout << "Fsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsub"; } -}; - -template -class Fmacc { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmadd_s(first, second, third, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmadd_d(first_d, second_d, third, frm, &fflags); - } else { - std::cout << "Fmacc only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmacc"; } -}; - -template -class Fnmacc { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fnmadd_s(first, second, third, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fnmadd_d(first_d, second_d, third, frm, &fflags); - } else { - std::cout << "Fnmacc only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmacc"; } -}; - -template -class Fmsac { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); - } else { - std::cout << "Fmsac only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmsac"; } -}; - -template -class Fnmsac { -public: - static R apply(T first, T second, R third) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fnmadd_s(first, second, rv_fsgnjn_s(third, third), frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fnmadd_d(first_d, second_d, rv_fsgnjn_d(third, third), frm, &fflags); - } else { - std::cout << "Fnmsac only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmsac"; } -}; - -template -class Fmadd { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fmacc::apply(first, third, second); - } else { - std::cout << "Fmadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmadd"; } -}; - -template -class Fnmadd { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fnmacc::apply(first, third, second); - } else { - std::cout << "Fnmadd only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmadd"; } -}; - -template -class Fmsub { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fmsac::apply(first, third, second); - } else { - std::cout << "Fmsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmsub"; } -}; - -template -class Fnmsub { -public: - static R apply(T first, T second, R third) { - if (sizeof(T) == 4 || sizeof(T) == 8) { - return Fnmsac::apply(first, third, second); - } else { - std::cout << "Fnmsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fnmsub"; } -}; - -template -class Fmin { -public: - static R apply(T first, T second, R) { - // ignoring rounding modes for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fmin_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fmin_d(first, second, &fflags); - } else { - std::cout << "Fmin only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmin"; } -}; - -template -class Fmax { -public: - static R apply(T first, T second, R) { - // ignoring rounding modes for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fmax_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fmax_d(first, second, &fflags); - } else { - std::cout << "Fmax only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmax"; } -}; - -template -class Fsgnj { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnj_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnj_d(second, first); - } else { - std::cout << "Fsgnj only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnj"; } -}; - -template -class Fsgnjn { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnjn_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnjn_d(second, first); - } else { - std::cout << "Fsgnjn only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnjn"; } -}; - -template -class Fsgnjx { -public: - static R apply(T first, T second, R) { - if (sizeof(T) == 4) { - return rv_fsgnjx_s(second, first); - } else if (sizeof(T) == 8) { - return rv_fsgnjx_d(second, first); - } else { - std::cout << "Fsgnjx only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fsgnjx"; } -}; - -template -class Fcvt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - switch (first) { - case 0b00000: // vfcvt.xu.f.v - return rv_ftou_s(second, frm, &fflags); - case 0b00001: // vfcvt.x.f.v - return rv_ftoi_s(second, frm, &fflags); - case 0b00010: // vfcvt.f.xu.v - return rv_utof_s(second, frm, &fflags); - case 0b00011: // vfcvt.f.x.v - return rv_itof_s(second, frm, &fflags); - case 0b00110: // vfcvt.rtz.xu.f.v - return rv_ftou_s(second, 1, &fflags); - case 0b00111: // vfcvt.rtz.x.f.v - return rv_ftoi_s(second, 1, &fflags); - case 0b01000: // vfwcvt.xu.f.v - return rv_ftolu_s(second, frm, &fflags); - case 0b01001: // vfwcvt.x.f.v - return rv_ftol_s(second, frm, &fflags); - case 0b01010: // vfwcvt.f.xu.v - return rv_utof_d(second, frm, &fflags); - case 0b01011: // vfwcvt.f.x.v - return rv_itof_d(second, frm, &fflags); - case 0b01100: // vfwcvt.f.f.v - return rv_ftod(second); - case 0b01110: // vfwcvt.rtz.xu.f.v - return rv_ftolu_s(second, 1, &fflags); - case 0b01111: // vfwcvt.rtz.x.f.v - return rv_ftol_s(second, 1, &fflags); - default: - std::cout << "Fcvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else if (sizeof(T) == 8) { - switch (first) { - case 0b00000: // vfcvt.xu.f.v - return rv_ftolu_d(second, frm, &fflags); - case 0b00001: // vfcvt.x.f.v - return rv_ftol_d(second, frm, &fflags); - case 0b00010: // vfcvt.f.xu.v - return rv_lutof_d(second, frm, &fflags); - case 0b00011: // vfcvt.f.x.v - return rv_ltof_d(second, frm, &fflags); - case 0b00110: // vfcvt.rtz.xu.f.v - return rv_ftolu_d(second, 1, &fflags); - case 0b00111: // vfcvt.rtz.x.f.v - return rv_ftol_d(second, 1, &fflags); - case 0b01000: // vfwcvt.xu.f.v - case 0b01001: // vfwcvt.x.f.v - case 0b01010: // vfwcvt.f.xu.v - case 0b01011: // vfwcvt.f.x.v - case 0b01100: // vfwcvt.f.f.v - case 0b01110: // vfwcvt.rtz.xu.f.v - case 0b01111: // vfwcvt.rtz.x.f.v - std::cout << "Fwcvt only supports f32" << std::endl; - std::abort(); - default: - std::cout << "Fcvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Fcvt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static R apply(T first, T second, uint32_t vxrm, uint32_t &) { // saturation argument is unused - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 8) { - switch (first) { - case 0b10000: // vfncvt.xu.f.w - return rv_ftou_d(second, vxrm, &fflags); - case 0b10001: // vfncvt.x.f.w - return rv_ftoi_d(second, vxrm, &fflags); - case 0b10010: // vfncvt.f.xu.w - return rv_lutof_s(second, vxrm, &fflags); - case 0b10011: // vfncvt.f.x.w - return rv_ltof_s(second, vxrm, &fflags); - case 0b10100: // vfncvt.f.f.w - return rv_dtof_r(second, vxrm); - case 0b10101: // vfncvt.rod.f.f.w - return rv_dtof_r(second, 6); - case 0b10110: // vfncvt.rtz.xu.f.w - return rv_ftou_d(second, 1, &fflags); - case 0b10111: // vfncvt.rtz.x.f.w - return rv_ftoi_d(second, 1, &fflags); - default: - std::cout << "Fncvt has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Fncvt only supports f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fcvt"; } -}; - -template -class Funary1 { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - switch (first) { - case 0b00000: // vfsqrt.v - return rv_fsqrt_s(second, frm, &fflags); - case 0b00100: // vfrsqrt7.v - return rv_frsqrt7_s(second, frm, &fflags); - case 0b00101: // vfrec7.v - return rv_frecip7_s(second, frm, &fflags); - case 0b10000: // vfclass.v - return rv_fclss_s(second); - default: - std::cout << "Funary1 has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else if (sizeof(T) == 8) { - switch (first) { - case 0b00000: // vfsqrt.v - return rv_fsqrt_d(second, frm, &fflags); - case 0b00100: // vfrsqrt7.v - return rv_frsqrt7_d(second, frm, &fflags); - case 0b00101: // vfrec7.v - return rv_frecip7_d(second, frm, &fflags); - case 0b10000: // vfclass.v - return rv_fclss_d(second); - default: - std::cout << "Funary1 has unsupported value for first: " << first << std::endl; - std::abort(); - } - } else { - std::cout << "Funary1 only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Funary1"; } -}; - -template -class Xunary0 { -public: - static R apply(T, T second, T) { - return second; - } - static std::string name() { return "Xunary0"; } -}; - -template -class Feq { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_feq_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_feq_d(second, first, &fflags); - } else { - std::cout << "Feq only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Feq"; } -}; - -template -class Fle { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fle_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_fle_d(second, first, &fflags); - } else { - std::cout << "Fle only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fle"; } -}; - -template -class Flt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_flt_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return rv_flt_d(second, first, &fflags); - } else { - std::cout << "Flt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Flt"; } -}; - -template -class Fne { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return !rv_feq_s(second, first, &fflags); - } else if (sizeof(T) == 8) { - return !rv_feq_d(second, first, &fflags); - } else { - std::cout << "Fne only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fne"; } -}; - -template -class Fgt { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_flt_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_flt_d(first, second, &fflags); - } else { - std::cout << "Fgt only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fgt"; } -}; - -template -class Fge { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - if (sizeof(T) == 4) { - return rv_fle_s(first, second, &fflags); - } else if (sizeof(T) == 8) { - return rv_fle_d(first, second, &fflags); - } else { - std::cout << "Fge only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fge"; } -}; - -template -class Fdiv { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fdiv_s(second, first, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fdiv_d(second, first, frm, &fflags); - } else { - std::cout << "Fdiv only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fdiv"; } -}; - -template -class Frdiv { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fdiv_s(first, second, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fdiv_d(first, second, frm, &fflags); - } else { - std::cout << "Frdiv only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Frdiv"; } -}; - -template -class Fmul { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(R) == 4) { - return rv_fmul_s(first, second, frm, &fflags); - } else if (sizeof(R) == 8) { - uint64_t first_d = sizeof(T) == 8 ? first : rv_ftod(first); - uint64_t second_d = sizeof(T) == 8 ? second : rv_ftod(second); - return rv_fmul_d(first_d, second_d, frm, &fflags); - } else { - std::cout << "Fmul only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Fmul"; } -}; - -template -class Frsub { -public: - static R apply(T first, T second, R) { - // ignoring flags for now - uint32_t fflags = 0; - // ignoring rounding mode for now - uint32_t frm = 0; - if (sizeof(T) == 4) { - return rv_fsub_s(first, second, frm, &fflags); - } else if (sizeof(T) == 8) { - return rv_fsub_d(first, second, frm, &fflags); - } else { - std::cout << "Frsub only supports f32 and f64" << std::endl; - std::abort(); - } - } - static std::string name() { return "Frsub"; } -}; - -template -class Clip { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { - // The low lg2(2*SEW) bits of the vector or scalar shift-amount value (e.g., the low 6 bits for a SEW=64-bit to - // SEW=32-bit narrowing operation) are used to control the right shift amount, which provides the scaling. - R firstValid = first & (sizeof(T) * 8 - 1); - T unclippedResult = (second >> firstValid) + roundBit(second, firstValid, vxrm); - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Clip"; } -}; - -template -class Smul { -public: - static R apply(T first, T second, uint32_t vxrm, uint32_t &vxsat_) { - R shift = sizeof(R) * 8 - 1; - T unshiftedResult = first * second; - T unclippedResult = (unshiftedResult >> shift) + roundBit(unshiftedResult, shift, vxrm); - R clippedResult = std::clamp(unclippedResult, (T)std::numeric_limits::min(), (T)std::numeric_limits::max()); - vxsat_ |= clippedResult != unclippedResult; - return clippedResult; - } - static std::string name() { return "Smul"; } -}; - -/////////////////////////////////////////////////////////////////////////////// - -bool isMasked(std::vector> &vreg_file, uint32_t maskVreg, uint32_t byteI, bool vmask) { - auto &mask = vreg_file.at(maskVreg); - uint8_t emask = *(uint8_t *)(mask.data() + byteI / 8); - uint8_t value = (emask >> (byteI % 8)) & 0x1; - DP(4, "Masking enabled: " << +!vmask << " mask element: " << +value); - return !vmask && value == 0; -} - -template -uint32_t getVreg(uint32_t baseVreg, uint32_t byteI) { - uint32_t vsew = sizeof(DT) * 8; - return (baseVreg + (byteI / (VLEN / vsew))) % 32; -} - -template -DT &getVregData(std::vector &baseVregVec, uint32_t byteI) { - uint32_t vsew = sizeof(DT) * 8; - return *(DT *)(baseVregVec.data() + (byteI % (VLEN / vsew)) * vsew / 8); -} - -template -DT &getVregData(std::vector> &vreg_file, uint32_t baseVreg, uint32_t byteI) { - auto &vr1 = vreg_file.at(getVreg
(baseVreg, byteI)); - return getVregData
(vr1, byteI); -} - -template -void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - if (nfields * emul > 8) { - std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; - std::abort(); - } - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - uint32_t nfields_strided = strided ? nfields : 1; - Word mem_addr = (base_addr & 0xFFFFFFFC) + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); - Word mem_data = 0; - emul_->dcache_read(&mem_data, mem_addr, vsew / 8); - DP(4, "Loading data " << mem_data << " from: " << mem_addr << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); - DP(4, "Previous data: " << +result); - result = (DT)mem_data; - } -} - -void vector_op_vix_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rdest, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 16: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 32: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - case 64: - vector_op_vix_load(vreg_file, emul_, base_addr, rdest, vl, strided, stride, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VLE for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - if (nfields * emul > 8) { - std::cout << "NFIELDS * EMUL = " << nfields * lmul << " but it should be <= 8" << std::endl; - std::abort(); - } - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - Word offset = 0; - switch (iSew) { - case 8: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 16: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 32: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 64: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - default: - std::cout << "Unsupported iSew: " << iSew << std::endl; - std::abort(); - } - - Word mem_addr = (base_addr & 0xFFFFFFFC) + offset + (i % nfields) * sizeof(DT); - Word mem_data = 0; - emul_->dcache_read(&mem_data, mem_addr, vsew / 8); - DP(4, "VLUX/VLOX - Loading data " << mem_data << " from: " << mem_addr << " with offset: " << std::dec << offset << " to vec reg: " << getVreg
(rdest + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - DT &result = getVregData
(vreg_file, rdest + (i % nfields) * emul, i / nfields); - DP(4, "Previous data: " << +result); - result = (DT)mem_data; - } -} - -void vector_op_vv_load(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rdest, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 16: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 32: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - case 64: - vector_op_vv_load(vreg_file, emul_, base_addr, rsrc1, rdest, iSew, vl, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VLUX/VLOX for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - uint32_t nfields_strided = strided ? nfields : 1; - Word mem_addr = base_addr + (i / nfields_strided) * stride + (i % nfields_strided) * sizeof(DT); - Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); - DP(4, "Storing: " << std::hex << mem_data << " at: " << mem_addr << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - emul_->dcache_write(&mem_data, mem_addr, vsew / 8); - } -} - -void vector_op_vix_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc3, uint32_t vsew, uint32_t vl, bool strided, WordI stride, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 16: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 32: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - case 64: - vector_op_vix_store(vreg_file, emul_, base_addr, rsrc3, vl, strided, stride, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VSE for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template -void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - uint32_t vsew = sizeof(DT) * 8; - uint32_t emul = lmul >> 2 ? 1 : 1 << (lmul & 0b11); - for (uint32_t i = 0; i < vl * nfields; i++) { - if (isMasked(vreg_file, 0, i / nfields, vmask)) - continue; - - Word offset = 0; - switch (iSew) { - case 8: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 16: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 32: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - case 64: - offset = getVregData(vreg_file, rsrc1, i / nfields); - break; - default: - std::cout << "Unsupported iSew: " << iSew << std::endl; - std::abort(); - } - - Word mem_addr = base_addr + offset + (i % nfields) * sizeof(DT); - Word mem_data = getVregData
(vreg_file, rsrc3 + (i % nfields) * emul, i / nfields); - DP(4, "VSUX/VSOX - Storing: " << std::hex << mem_data << " at: " << mem_addr << " with offset: " << std::dec << offset << " from vec reg: " << getVreg
(rsrc3 + (i % nfields) * emul, i / nfields) << " i: " << i / nfields); - emul_->dcache_write(&mem_data, mem_addr, vsew / 8); - } -} - -void vector_op_vv_store(std::vector> &vreg_file, vortex::Emulator *emul_, WordI base_addr, uint32_t rsrc1, uint32_t rsrc3, uint32_t vsew, uint32_t iSew, uint32_t vl, uint32_t nfields, uint32_t lmul, uint32_t vmask) { - switch (vsew) { - case 8: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 16: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 32: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - case 64: - vector_op_vv_store(vreg_file, emul_, base_addr, rsrc1, rsrc3, iSew, vl, nfields, lmul, vmask); - break; - default: - std::cout << "Failed to execute VSUX/VSOX for vsew: " << vsew << std::endl; - std::abort(); - } -} - -template