diff --git a/README.md b/README.md index 540b0e8b625233..0608fc3ba1e1e5 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,7 @@ Any feature not listed below but present in the specification should be consider - (Done) `12.12. Vector Single-Width Integer Multiply-Add Instructions` - (Done) `12.13. Vector Widening Integer Multiply-Add Instructions` - (Done) `12.14. Vector Integer Merge and Move Instructions` + - (Done) `13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation` - (WIP) Clang intrinsics related to the `XTHeadVector` extension: - (WIP) `6. Configuration-Setting and Utility` - (Done) `6.1. Set vl and vtype` diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td index 5b0f9d0d70ef99..c35df326c92827 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCVXTHeadV.td @@ -793,4 +793,8 @@ let TargetPrefix = "riscv" in { let ScalarOperand = 1; let VLOperand = 2; } + + // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation Instructions + defm th_vsmul : XVBinaryABX; + } // TargetPrefix = "riscv" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td index daa8e3222ee563..536612bb643a2e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHeadVPseudos.td @@ -2089,6 +2089,21 @@ multiclass XVPseudoVWMAC_VX { } } +multiclass XVPseudoVSMUL_VV_VX { + foreach m = MxListXTHeadV in { + defvar mx = m.MX; + defvar WriteVSIMulV_MX = !cast("WriteVSMulV_" # mx); + defvar WriteVSIMulX_MX = !cast("WriteVSMulX_" # mx); + defvar ReadVSIMulV_MX = !cast("ReadVSMulV_" # mx); + defvar ReadVSIMulX_MX = !cast("ReadVSMulX_" # mx); + + defm "" : XVPseudoBinaryV_VV, + Sched<[WriteVSIMulV_MX, ReadVSIMulV_MX, ReadVSIMulV_MX, ReadVMask]>; + defm "" : XVPseudoBinaryV_VX, + Sched<[WriteVSIMulX_MX, ReadVSIMulV_MX, ReadVSIMulX_MX, ReadVMask]>; + } +} + //===----------------------------------------------------------------------===// // Helpers to define the intrinsic patterns for the XTHeadVector extension. //===----------------------------------------------------------------------===// @@ -3020,4 +3035,18 @@ let Predicates = [HasVendorXTHeadV] in { } } // Predicates = [HasVendorXTHeadV] -include "RISCVInstrInfoXTHeadVVLPatterns.td" +//===----------------------------------------------------------------------===// +// 13.3. Vector Single-Width Fractional Multiply with Rounding and +// Saturation Instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasVendorXTHeadV] in { + defm PseudoTH_VSMUL : XVPseudoVSMUL_VV_VX; +} // Predicates = [HasVendorXTHeadV] + +let Predicates = [HasVendorXTHeadV] in { + defm : XVPatBinaryV_VV_VX<"int_riscv_th_vsmul", "PseudoTH_VSMUL", AllIntegerXVectors>; + // defm : XVPatBinaryV_VV_VX<"int_riscv_th_vsmul", "PseudoTH_VSMUL", AllIntegerXVectors, isSEWAware=1>; +} // Predicates = [HasVendorXTHeadV] + +include "RISCVInstrInfoXTHeadVVLPatterns.td" \ No newline at end of file diff --git a/llvm/test/CodeGen/RISCV/rvv0p71/vsmul.ll b/llvm/test/CodeGen/RISCV/rvv0p71/vsmul.ll new file mode 100644 index 00000000000000..be7ea0afefd13b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv0p71/vsmul.ll @@ -0,0 +1,2440 @@ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xtheadvector \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xtheadvector \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.riscv.th.vsmul.nxv8i8.nxv8i8( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i8_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i8.nxv8i8( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i8.nxv8i8( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i8_nxv8i8_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i8.nxv8i8( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i8.i8( + , + , + i8, + iXLen); + +define @intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8( %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i8.i8( + undef, + %0, + i8 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i8.i8( + , + , + i8, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i8_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i8.i8( + %0, + %1, + i8 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i8.nxv16i8( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i8_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i8.nxv16i8( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i8.nxv16i8( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i8_nxv16i8_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i8.nxv16i8( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i8.i8( + , + , + i8, + iXLen); + +define @intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8( %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i8.i8( + undef, + %0, + i8 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i8.i8( + , + , + i8, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i8_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i8.i8( + %0, + %1, + i8 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv32i8.nxv32i8( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i8_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv32i8.nxv32i8( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv32i8.nxv32i8( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i8_nxv32i8_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv32i8.nxv32i8( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv32i8.i8( + , + , + i8, + iXLen); + +define @intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8( %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv32i8.i8( + undef, + %0, + i8 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv32i8.i8( + , + , + i8, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i8_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv32i8.i8( + %0, + %1, + i8 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv64i8.nxv64i8( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv64i8_nxv64i8_nxv64i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e8, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv64i8.nxv64i8( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv64i8.nxv64i8( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m8, d1 +; CHECK-NEXT: th.vle.v v24, (a0) +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a2 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv64i8.nxv64i8( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv64i8.i8( + , + , + i8, + iXLen); + +define @intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8( %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv64i8.i8( + undef, + %0, + i8 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv64i8.i8( + , + , + i8, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8( %0, %1, i8 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv64i8_nxv64i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e8, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv64i8.i8( + %0, + %1, + i8 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i16.nxv4i16( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i16_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i16.nxv4i16( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i16.nxv4i16( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i16_nxv4i16_nxv4i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i16.nxv4i16( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i16.i16( + , + , + i16, + iXLen); + +define @intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16( %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i16.i16( + undef, + %0, + i16 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i16.i16( + , + , + i16, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i16_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i16.i16( + %0, + %1, + i16 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i16.nxv8i16( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i16_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i16.nxv8i16( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i16.nxv8i16( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i16_nxv8i16_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i16.nxv8i16( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i16.i16( + , + , + i16, + iXLen); + +define @intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16( %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i16.i16( + undef, + %0, + i16 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i16.i16( + , + , + i16, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i16_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i16.i16( + %0, + %1, + i16 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i16.nxv16i16( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i16_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i16.nxv16i16( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i16.nxv16i16( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i16_nxv16i16_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i16.nxv16i16( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i16.i16( + , + , + i16, + iXLen); + +define @intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16( %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i16.i16( + undef, + %0, + i16 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i16.i16( + , + , + i16, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i16_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i16.i16( + %0, + %1, + i16 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv32i16.nxv32i16( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv32i16_nxv32i16_nxv32i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e16, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv32i16.nxv32i16( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv32i16.nxv32i16( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e16, m8, d1 +; CHECK-NEXT: th.vle.v v24, (a0) +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a2 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv32i16.nxv32i16( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv32i16.i16( + , + , + i16, + iXLen); + +define @intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16( %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv32i16.i16( + undef, + %0, + i16 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv32i16.i16( + , + , + i16, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16( %0, %1, i16 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv32i16_nxv32i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e16, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv32i16.i16( + %0, + %1, + i16 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv2i32.nxv2i32( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i32_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv2i32.nxv2i32( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv2i32.nxv2i32( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i32_nxv2i32_nxv2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv2i32.nxv2i32( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv2i32.i32( + , + , + i32, + iXLen); + +define @intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32( %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv2i32.i32( + undef, + %0, + i32 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv2i32.i32( + , + , + i32, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv2i32_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m1, d1 +; CHECK-NEXT: th.vsmul.vx v8, v9, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv2i32.i32( + %0, + %1, + i32 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i32.nxv4i32( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i32_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i32.nxv4i32( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i32.nxv4i32( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i32_nxv4i32_nxv4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i32.nxv4i32( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i32.i32( + , + , + i32, + iXLen); + +define @intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32( %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i32.i32( + undef, + %0, + i32 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i32.i32( + , + , + i32, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv4i32_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m2, d1 +; CHECK-NEXT: th.vsmul.vx v8, v10, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i32.i32( + %0, + %1, + i32 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i32.nxv8i32( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i32_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i32.nxv8i32( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i32.nxv8i32( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i32_nxv8i32_nxv8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i32.nxv8i32( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i32.i32( + , + , + i32, + iXLen); + +define @intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32( %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i32.i32( + undef, + %0, + i32 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i32.i32( + , + , + i32, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i32_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m4, d1 +; CHECK-NEXT: th.vsmul.vx v8, v12, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i32.i32( + %0, + %1, + i32 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i32.nxv16i32( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv16i32_nxv16i32_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e32, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i32.nxv16i32( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i32.nxv16i32( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e32, m8, d1 +; CHECK-NEXT: th.vle.v v24, (a0) +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a2 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i32.nxv16i32( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv16i32.i32( + , + , + i32, + iXLen); + +define @intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32( %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv16i32.i32( + undef, + %0, + i32 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv16i32.i32( + , + , + i32, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32( %0, %1, i32 %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv16i32_nxv16i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: th.vsetvli zero, a1, e32, m8, d1 +; CHECK-NEXT: th.vsmul.vx v8, v16, a0, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv16i32.i32( + %0, + %1, + i32 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv1i64.nxv1i64( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv1i64_nxv1i64_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv1i64.nxv1i64( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv1i64.nxv1i64( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv1i64_nxv1i64_nxv1i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m1, d1 +; CHECK-NEXT: th.vsmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv1i64.nxv1i64( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv1i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m1, d1 +; RV32-NEXT: th.vlse.v v9, (a0), zero +; RV32-NEXT: th.vsmul.vv v8, v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m1, d1 +; RV64-NEXT: th.vsmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv1i64.i64( + undef, + %0, + i64 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv1i64.i64( + , + , + i64, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m1, d1 +; RV32-NEXT: th.vlse.v v10, (a0), zero +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: th.vsmul.vv v8, v9, v10, v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv1i64_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m1, d1 +; RV64-NEXT: th.vsmul.vx v8, v9, a0, v0.t +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv2i64.nxv2i64( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv2i64_nxv2i64_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv2i64.nxv2i64( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv2i64.nxv2i64( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv2i64_nxv2i64_nxv2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m2, d1 +; CHECK-NEXT: th.vsmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv2i64.nxv2i64( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv2i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m2, d1 +; RV32-NEXT: th.vlse.v v10, (a0), zero +; RV32-NEXT: th.vsmul.vv v8, v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m2, d1 +; RV64-NEXT: th.vsmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv2i64.i64( + undef, + %0, + i64 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv2i64.i64( + , + , + i64, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m2, d1 +; RV32-NEXT: th.vlse.v v12, (a0), zero +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: th.vsmul.vv v8, v10, v12, v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv2i64_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m2, d1 +; RV64-NEXT: th.vsmul.vx v8, v10, a0, v0.t +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i64.nxv4i64( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv4i64_nxv4i64_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i64.nxv4i64( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i64.nxv4i64( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv4i64_nxv4i64_nxv4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m4, d1 +; CHECK-NEXT: th.vsmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i64.nxv4i64( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv4i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m4, d1 +; RV32-NEXT: th.vlse.v v12, (a0), zero +; RV32-NEXT: th.vsmul.vv v8, v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m4, d1 +; RV64-NEXT: th.vsmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv4i64.i64( + undef, + %0, + i64 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv4i64.i64( + , + , + i64, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m4, d1 +; RV32-NEXT: th.vlse.v v16, (a0), zero +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: th.vsmul.vv v8, v12, v16, v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv4i64_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m4, d1 +; RV64-NEXT: th.vsmul.vx v8, v12, a0, v0.t +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i64.nxv8i64( + , + , + , + iXLen); + +define @intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vsmul_vv_nxv8i64_nxv8i64_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a1, a2 +; CHECK-NEXT: th.vsetvli zero, a0, e64, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i64.nxv8i64( + undef, + %0, + %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i64.nxv8i64( + , + , + , + , + iXLen); + +define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a2, vl +; CHECK-NEXT: csrr a3, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e64, m8, d1 +; CHECK-NEXT: th.vle.v v24, (a0) +; CHECK-NEXT: th.vsetvl zero, a2, a3 +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: csrr a2, vtype +; CHECK-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; CHECK-NEXT: th.vsetvl zero, a0, a2 +; CHECK-NEXT: th.vsetvli zero, a1, e64, m8, d1 +; CHECK-NEXT: th.vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i64.nxv8i64( + %0, + %1, + %2, + %3, + iXLen %4) + + ret %a +} + +declare @llvm.riscv.th.vsmul.nxv8i64.i64( + , + , + i64, + iXLen); + +define @intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m8, d1 +; RV32-NEXT: th.vlse.v v16, (a0), zero +; RV32-NEXT: th.vsmul.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m8, d1 +; RV64-NEXT: th.vsmul.vx v8, v8, a0 +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.nxv8i64.i64( + undef, + %0, + i64 %1, + iXLen %2) + + ret %a +} + +declare @llvm.riscv.th.vsmul.mask.nxv8i64.i64( + , + , + i64, + , + iXLen); + +define @intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: csrr a3, vl +; RV32-NEXT: csrr a4, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a3, a4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: th.vsetvli zero, a2, e64, m8, d1 +; RV32-NEXT: th.vlse.v v24, (a0), zero +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: th.vsmul.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vl +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV32-NEXT: th.vsetvl zero, a0, a1 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: csrr a2, vl +; RV64-NEXT: csrr a3, vtype +; RV64-NEXT: th.vsetvli zero, zero, e8, m1, d1 +; RV64-NEXT: th.vsetvl zero, a2, a3 +; RV64-NEXT: th.vsetvli zero, a1, e64, m8, d1 +; RV64-NEXT: th.vsmul.vx v8, v16, a0, v0.t +; RV64-NEXT: ret +entry: + %a = call @llvm.riscv.th.vsmul.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + iXLen %4) + + ret %a +} \ No newline at end of file