From bf878ba82c20c38592fadbdc989e1373ab4a6188 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 22 Jan 2025 18:22:14 +0000 Subject: [PATCH 1/4] [msan] Add avx512-intrinsics-upgrade.ll test case This is forked from llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll. It tests intrinsics that LLVM "auto-upgrades"; for example, @llvm.x86.avx512.mask.store is converted into @llvm.masked.store (which has the interesting side effect that MemorySanitizer can already handle it via its existing handleMaskedStore). --- .../avx512-intrinsics-upgrade.ll | 19969 ++++++++++++++++ 1 file changed, 19969 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll new file mode 100644 index 00000000000000..f74858cb0ed516 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll @@ -0,0 +1,19969 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll + +declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone + +define i16 @unpckbw_test(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @unpckbw_test( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i1> [[TMP3]], <16 x i1> [[TMP3]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[TMP4]], <16 x i1> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i1> [[TMP6]], <16 x i1> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i1> [[_MSPROP1]], <8 x i1> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i1> [[TMP8]], <8 x i1> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[_MSPROP2]] to i16 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: store i16 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP11]] +; + %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1) + ret i16 %res +} + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcastd_gpr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <16 x i32> poison, i32 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = shufflevector <16 x i32> [[_MSPROP6]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT1]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP7]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[DOTSPLAT2]], [[X1:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[DOTSPLAT2]], <16 x i32> [[X1]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <16 x i32> poison, i32 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = shufflevector <16 x i32> [[_MSPROP8]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT3]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[DOTSPLAT4]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT10:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[DOTSPLAT4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP5]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[DOTSPLAT]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP18]], <16 x i32> [[_MSPROP_SELECT]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP10]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP19]], <16 x i32> [[_MSPROP_SELECT10]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP17]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} +declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16) + + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcastq_gpr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP5:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = shufflevector <8 x i64> [[_MSPROP6]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP7]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[DOTSPLAT2]], [[X1:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[DOTSPLAT2]], <8 x i64> [[X1]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[TMP1]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <8 x i64> poison, i64 [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = shufflevector <8 x i64> [[_MSPROP8]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT3]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[_MSPROP9]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[DOTSPLAT4]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT10:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[DOTSPLAT4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP5]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[DOTSPLAT]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP18]], <8 x i64> [[_MSPROP_SELECT]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP10]], 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP19]], <8 x i64> [[_MSPROP_SELECT10]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP17]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} +declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8) + + +declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly + +define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_vbroadcast_ss_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> undef, i16 -1) + ret <16 x float> %res +} + +define <16 x float> @test_x86_mask_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) #0 { +; +; CHECK-LABEL: @test_x86_mask_vbroadcast_ss_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[A1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[A1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_x86_maskz_vbroadcast_ss_ps_512(<4 x float> %a0, i16 %mask ) #0 { +; +; CHECK-LABEL: @test_x86_maskz_vbroadcast_ss_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask) + ret <16 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly + +define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_vbroadcast_sd_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> undef, i8 -1) + ret <8 x double> %res +} + +define <8 x double> @test_x86_mask_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) #0 { +; +; CHECK-LABEL: @test_x86_mask_vbroadcast_sd_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[A1:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[A1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_x86_maskz_vbroadcast_sd_pd_512(<2 x double> %a0, i8 %mask ) #0 { +; +; CHECK-LABEL: @test_x86_maskz_vbroadcast_sd_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pbroadcastd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X0:%.*]], <4 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcastd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[X0:%.*]], <4 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pbroadcastd_512(<4 x i32> %x0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pbroadcastd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X0:%.*]], <4 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP3]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pbroadcastq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X0:%.*]], <2 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pbroadcastq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[X0:%.*]], <2 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pbroadcastq_512(<2 x i64> %x0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pbroadcastq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[X0:%.*]], <2 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP3]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) + ret <8 x i64> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_movsldup_512(<16 x float> %x0, <16 x float> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_movsldup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_movsldup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_maskz_movsldup_512(<16 x float> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_movsldup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_movshdup_512(<16 x float> %x0, <16 x float> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_movshdup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_movshdup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_maskz_movshdup_512(<16 x float> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_movshdup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2) + ret <16 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_movddup_512(<8 x double> %x0, <8 x double> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_movddup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_movddup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X1:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_movddup_512(<8 x double> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_movddup_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_perm_df_512(<8 x double> %x0, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_perm_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_perm_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_perm_df_512(<8 x double> %x0, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_perm_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3) + ret <8 x double> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_perm_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_perm_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], [[X2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_perm_di_512(<8 x i64> %x0, i32 %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_perm_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP3]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3) + ret <8 x i64> %res +} + +define void @test_store1(<16 x float> %data, ptr %ptr, ptr %ptr2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_store1( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP2]], ptr [[TMP9]], i32 1, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[TMP5]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[DATA:%.*]], ptr [[PTR]], i32 1, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <16 x float> [[DATA]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.ps.512(ptr %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.storeu.ps.512(ptr %ptr2, <16 x float> %data, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.ps.512(ptr, <16 x float>, i16 ) + +define void @test_store2(<8 x double> %data, ptr %ptr, ptr %ptr2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_store2( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[TMP2]], ptr [[TMP9]], i32 1, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[TMP5]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[DATA:%.*]], ptr [[PTR]], i32 1, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <8 x double> [[DATA]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.pd.512(ptr %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.storeu.pd.512(ptr %ptr2, <8 x double> %data, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.pd.512(ptr, <8 x double>, i8) + +define void @test_mask_store_aligned_ps(<16 x float> %data, ptr %ptr, ptr %ptr2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_store_aligned_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP2]], ptr [[TMP9]], i32 64, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[TMP5]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[DATA:%.*]], ptr [[PTR]], i32 64, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[TMP17]], align 64 +; CHECK-NEXT: store <16 x float> [[DATA]], ptr [[PTR2]], align 64 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.store.ps.512(ptr %ptr, <16 x float> %data, i16 %mask) + call void @llvm.x86.avx512.mask.store.ps.512(ptr %ptr2, <16 x float> %data, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.ps.512(ptr, <16 x float>, i16 ) + +define void @test_mask_store_aligned_pd(<8 x double> %data, ptr %ptr, ptr %ptr2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_store_aligned_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[TMP2]], ptr [[TMP9]], i32 64, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[TMP5]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[DATA:%.*]], ptr [[PTR]], i32 64, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP17]], align 64 +; CHECK-NEXT: store <8 x double> [[DATA]], ptr [[PTR2]], align 64 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.store.pd.512(ptr %ptr, <8 x double> %data, i8 %mask) + call void @llvm.x86.avx512.mask.store.pd.512(ptr %ptr2, <8 x double> %data, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.pd.512(ptr, <8 x double>, i8) + +define void@test_int_x86_avx512_mask_storeu_q_512(ptr %ptr1, ptr %ptr2, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_storeu_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[TMP2]], ptr [[TMP9]], i32 1, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[TMP5]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[X1:%.*]], ptr [[PTR1]], i32 1, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <8 x i64> [[X1]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.q.512(ptr %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.storeu.q.512(ptr %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.q.512(ptr, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_storeu_d_512(ptr %ptr1, ptr %ptr2, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_storeu_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP2]], ptr [[TMP9]], i32 1, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[TMP5]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[X1:%.*]], ptr [[PTR1]], i32 1, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[TMP17]], align 1 +; CHECK-NEXT: store <16 x i32> [[X1]], ptr [[PTR2]], align 1 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.storeu.d.512(ptr %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.storeu.d.512(ptr %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.storeu.d.512(ptr, <16 x i32>, i16) + +define void@test_int_x86_avx512_mask_store_q_512(ptr %ptr1, ptr %ptr2, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_store_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[TMP2]], ptr [[TMP9]], i32 64, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[TMP5]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v8i64.p0(<8 x i64> [[X1:%.*]], ptr [[PTR1]], i32 64, <8 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP17]], align 64 +; CHECK-NEXT: store <8 x i64> [[X1]], ptr [[PTR2]], align 64 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.store.q.512(ptr %ptr1, <8 x i64> %x1, i8 %x2) + call void @llvm.x86.avx512.mask.store.q.512(ptr %ptr2, <8 x i64> %x1, i8 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.q.512(ptr, <8 x i64>, i8) + +define void@test_int_x86_avx512_mask_store_d_512(ptr %ptr1, ptr %ptr2, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_store_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR1:%.*]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP2]], ptr [[TMP9]], i32 64, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[TMP5]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[X1:%.*]], ptr [[PTR1]], i32 64, <16 x i1> [[TMP6]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080 +; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[TMP17]], align 64 +; CHECK-NEXT: store <16 x i32> [[X1]], ptr [[PTR2]], align 64 +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.store.d.512(ptr %ptr1, <16 x i32> %x1, i16 %x2) + call void @llvm.x86.avx512.mask.store.d.512(ptr %ptr2, <16 x i32> %x1, i16 -1) + ret void +} + +declare void @llvm.x86.avx512.mask.store.d.512(ptr, <16 x i32>, i16) + +define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, ptr %ptr, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_aligned_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP13]], i32 64, <16 x i1> [[TMP10]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[PTR]], i32 64, <16 x i1> [[TMP10]], <16 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP22]], i32 64, <16 x i1> [[TMP19]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[PTR]], i32 64, <16 x i1> [[TMP19]], <16 x float> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES4]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(ptr %ptr, <16 x float> zeroinitializer, i16 -1) + %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(ptr %ptr, <16 x float> %res, i16 %mask) + %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(ptr %ptr, <16 x float> zeroinitializer, i16 %mask) + %res4 = fadd <16 x float> %res2, %res1 + ret <16 x float> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(ptr, <16 x float>, i16) + +define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, ptr %ptr, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_unaligned_ps( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP13]], i32 1, <16 x i1> [[TMP10]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[PTR]], i32 1, <16 x i1> [[TMP10]], <16 x float> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP22]], i32 1, <16 x i1> [[TMP19]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr [[PTR]], i32 1, <16 x i1> [[TMP19]], <16 x float> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES4]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(ptr %ptr, <16 x float> zeroinitializer, i16 -1) + %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(ptr %ptr, <16 x float> %res, i16 %mask) + %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(ptr %ptr, <16 x float> zeroinitializer, i16 %mask) + %res4 = fadd <16 x float> %res2, %res1 + ret <16 x float> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(ptr, <16 x float>, i16) + +define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, ptr %ptr, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_aligned_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, ptr [[PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP13]], i32 64, <8 x i1> [[TMP10]], <8 x i64> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[PTR]], i32 64, <8 x i1> [[TMP10]], <8 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP22]], i32 64, <8 x i1> [[TMP19]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[PTR]], i32 64, <8 x i1> [[TMP19]], <8 x double> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES4]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(ptr %ptr, <8 x double> zeroinitializer, i8 -1) + %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(ptr %ptr, <8 x double> %res, i8 %mask) + %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(ptr %ptr, <8 x double> zeroinitializer, i8 %mask) + %res4 = fadd <8 x double> %res2, %res1 + ret <8 x double> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(ptr, <8 x double>, i8) + +define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, ptr %ptr, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_unaligned_pd( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x double>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP13]], i32 1, <8 x i1> [[TMP10]], <8 x i64> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[PTR]], i32 1, <8 x i1> [[TMP10]], <8 x double> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP22]], i32 1, <8 x i1> [[TMP19]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[PTR]], i32 1, <8 x i1> [[TMP19]], <8 x double> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES4]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(ptr %ptr, <8 x double> zeroinitializer, i8 -1) + %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(ptr %ptr, <8 x double> %res, i8 %mask) + %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(ptr %ptr, <8 x double> zeroinitializer, i8 %mask) + %res4 = fadd <8 x double> %res2, %res1 + ret <8 x double> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(ptr, <8 x double>, i8) + +declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(ptr, <16 x i32>, i16) + +define <16 x i32> @test_mask_load_unaligned_d(ptr %ptr, ptr %ptr2, <16 x i32> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_unaligned_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP14]], i32 1, <16 x i1> [[TMP11]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i1> [[TMP10]] to i16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[PTR2]], i32 1, <16 x i1> [[TMP11]], <16 x i32> [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP23]], i32 1, <16 x i1> [[TMP20]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i1> [[TMP19]] to i16 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP24]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP25:%.*]], label [[TMP26:%.*]], !prof [[PROF1]] +; CHECK: 25: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[PTR]], i32 1, <16 x i1> [[TMP20]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i32> [[TMP27]], [[TMP18]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(ptr %ptr, <16 x i32> zeroinitializer, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(ptr %ptr2, <16 x i32> %res, i16 %mask) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(ptr %ptr, <16 x i32> zeroinitializer, i16 %mask) + %res4 = add <16 x i32> %res2, %res1 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(ptr, <8 x i64>, i8) + +define <8 x i64> @test_mask_load_unaligned_q(ptr %ptr, ptr %ptr2, <8 x i64> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_unaligned_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR2:%.*]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP14]], i32 1, <8 x i1> [[TMP11]], <8 x i64> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i1> [[TMP10]] to i8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[PTR2]], i32 1, <8 x i1> [[TMP11]], <8 x i64> [[TMP6]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080 +; CHECK-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP23]], i32 1, <8 x i1> [[TMP20]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i1> [[TMP19]] to i8 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP24]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP25:%.*]], label [[TMP26:%.*]], !prof [[PROF1]] +; CHECK: 25: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[PTR]], i32 1, <8 x i1> [[TMP20]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = add <8 x i64> [[TMP27]], [[TMP18]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(ptr %ptr, <8 x i64> zeroinitializer, i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(ptr %ptr2, <8 x i64> %res, i8 %mask) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(ptr %ptr, <8 x i64> zeroinitializer, i8 %mask) + %res4 = add <8 x i64> %res2, %res1 + ret <8 x i64> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(ptr, <16 x i32>, i16) + +define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, ptr %ptr, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_aligned_d( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP13]], i32 64, <16 x i1> [[TMP10]], <16 x i32> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i16 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[PTR]], i32 64, <16 x i1> [[TMP10]], <16 x i32> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP22]], i32 64, <16 x i1> [[TMP19]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[PTR]], i32 64, <16 x i1> [[TMP19]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i32> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(ptr %ptr, <16 x i32> zeroinitializer, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(ptr %ptr, <16 x i32> %res, i16 %mask) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(ptr %ptr, <16 x i32> zeroinitializer, i16 %mask) + %res4 = add <16 x i32> %res2, %res1 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(ptr, <8 x i64>, i8) + +define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, ptr %ptr, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_load_aligned_q( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP13]], i32 64, <8 x i1> [[TMP10]], <8 x i64> [[_MSLD]]) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i8 [[TMP14]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[PTR]], i32 64, <8 x i1> [[TMP10]], <8 x i64> [[TMP5]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], 87960930222080 +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; CHECK-NEXT: [[_MSMASKEDLD1:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[TMP22]], i32 64, <8 x i1> [[TMP19]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP23]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]] +; CHECK: 24: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr [[PTR]], i32 64, <8 x i1> [[TMP19]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[_MSMASKEDLD1]], [[_MSMASKEDLD]] +; CHECK-NEXT: [[RES4:%.*]] = add <8 x i64> [[TMP26]], [[TMP17]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(ptr %ptr, <8 x i64> zeroinitializer, i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(ptr %ptr, <8 x i64> %res, i8 %mask) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(ptr %ptr, <8 x i64> zeroinitializer, i8 %mask) + %res4 = add <8 x i64> %res2, %res1 + ret <8 x i64> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermil_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermil_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_vpermil_pd_512(<8 x double> %x0, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermil_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermil_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermil_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_maskz_vpermil_ps_512(<16 x float> %x0, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermil_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3) + ret <16 x float> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pshuf_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], [[X2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pshuf_d_512(<16 x i32> %x0, i32 %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pshuf_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP3]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3) + ret <16 x i32> %res +} + +define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_pcmpeq_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: store i16 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP11]] +; + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_pcmpeq_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP5]], splat (i32 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i1> [[TMP17]] to i16 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: store i16 [[TMP19]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP20]] +; + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) + +define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_pcmpeq_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP]] to i8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: store i8 [[TMP10]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP11]] +; + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_pcmpeq_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP5]], splat (i64 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i1> [[TMP17]] to i8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: store i8 [[TMP19]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP20]] +; + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) + +define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_pcmpgt_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[B:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i1> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i1> [[TMP13]] to i16 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i1> [[TMP14]] to i16 +; CHECK-NEXT: store i16 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP16]] +; + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_pcmpgt_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[A:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i32> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[B:%.*]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt <16 x i32> [[TMP7]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i1> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i1> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i1> [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <16 x i1> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i1> [[TMP22]] to i16 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i1> [[TMP23]] to i16 +; CHECK-NEXT: store i16 [[TMP24]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP25]] +; + %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16) + +define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_pcmpgt_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[B:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP7]], [[TMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <8 x i64> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i1> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i1> [[TMP13]] to i8 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i1> [[TMP14]] to i8 +; CHECK-NEXT: store i8 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP16]] +; + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_pcmpgt_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[A:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP4]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[B:%.*]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP8]], [[TMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <8 x i64> [[TMP6]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt <8 x i64> [[TMP7]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i1> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i1> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i1> [[TMP14]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i1> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i1> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i1> [[TMP22]] to i8 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x i1> [[TMP23]] to i8 +; CHECK-NEXT: store i8 [[TMP24]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP25]] +; + %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8) + +declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_unpckh_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_unpckh_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP13]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP7]], <8 x double> [[TMP5]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_unpckh_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_unpckh_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_unpckl_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_unpckl_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP13]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP7]], <8 x double> [[TMP5]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_unpckl_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_unpckl_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpcklqd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_punpcklqd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_punpcklqd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpckhqd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_punpckhqd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpckhd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_punpckhd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_punpckld_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_punpckld_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[A1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP13]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_pslli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[A1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP13]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_pslli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[A1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP13]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrli_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[A1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP13]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrli_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[A1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP13]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrai_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrai_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrai_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[A1:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[A1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP13]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrai_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone + +declare void @llvm.x86.avx512.storent.q.512(ptr, <8 x i64>) + +define void@test_storent_q_512(<8 x i64> %data, ptr %ptr) #0 { +; +; CHECK-LABEL: @test_storent_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP7]], align 64 +; CHECK-NEXT: store <8 x i64> [[DATA:%.*]], ptr [[PTR]], align 64, !nontemporal [[META2:![0-9]+]] +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.storent.q.512(ptr %ptr, <8 x i64> %data) + ret void +} + +declare void @llvm.x86.avx512.storent.pd.512(ptr, <8 x double>) + +define void @test_storent_pd_512(<8 x double> %data, ptr %ptr) #0 { +; +; CHECK-LABEL: @test_storent_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr [[TMP7]], align 64 +; CHECK-NEXT: store <8 x double> [[DATA:%.*]], ptr [[PTR]], align 64, !nontemporal [[META2]] +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.storent.pd.512(ptr %ptr, <8 x double> %data) + ret void +} + +declare void @llvm.x86.avx512.storent.ps.512(ptr, <16 x float>) + +define void @test_storent_ps_512(<16 x float> %data, ptr %ptr) #0 { +; +; CHECK-LABEL: @test_storent_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: store <16 x i32> [[TMP2]], ptr [[TMP7]], align 64 +; CHECK-NEXT: store <16 x float> [[DATA:%.*]], ptr [[PTR]], align 64, !nontemporal [[META2]] +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.storent.ps.512(ptr %ptr, <16 x float> %data) + ret void +} + +define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_xor_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_xor_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_or_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[B:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i32> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[A]], [[B]] +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_or_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i32> [[A:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP6:%.*]] = xor <16 x i32> [[B:%.*]], splat (i32 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i32> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i32> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP12]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP18]], <16 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP12]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP19]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_and_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[A:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i32> [[A]], [[B]] +; CHECK-NEXT: store <16 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_and_epi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i32> [[A:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i32> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP9]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_xor_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_xor_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_or_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A:%.*]], splat (i64 -1) +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[B:%.*]], splat (i64 -1) +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[A]], [[B]] +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_or_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = xor <8 x i64> [[A:%.*]], splat (i64 -1) +; CHECK-NEXT: [[TMP6:%.*]] = xor <8 x i64> [[B:%.*]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i64> [[TMP12]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP18]], <8 x i64> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP12]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_and_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and <8 x i64> [[A:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[A]], [[B]] +; CHECK-NEXT: store <8 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_and_epi64( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[A:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP1]], [[B:%.*]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[A]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP9]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP9]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_add_epi32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP2]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP2]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_sub_epi32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = sub <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP2]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP9:%.*]] = sub <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP2]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_mask_add_epi64_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_add_epi64_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_mask_sub_epi64_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = sub <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i64> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %b = load <8 x i64>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP8:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sub_epi64_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP9:%.*]] = sub <8 x i64> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_mullo_epi32_rr_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rrk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rrkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rm_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rmk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rmkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rmb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <16 x i32> %passThru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rmbk_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP2]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) + ret < 16 x i32> %res +} + +define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mullo_epi32_rmbkz_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> undef, i32 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> [[TMP2]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP9:%.*]] = mul <16 x i32> [[A:%.*]], [[B]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP2]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %q = load i32, ptr %ptr_b + %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 + %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer + %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask) + ret < 16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + + +declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_f32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_f32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X3:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> [[X3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) + ret <16 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_f64x2( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_f64x2( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[X3:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP13]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP7]], <8 x double> [[TMP5]], <8 x double> [[X3]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_shuf_f64x2( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP11]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP12]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) + ret <8 x double> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_i32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_i32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X3:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_i64x2( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_i64x2( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X3:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X3]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4) + ret <8 x i64> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[X3:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP13]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP7]], <8 x double> [[TMP5]], <8 x double> [[X3]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_shuf_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP11]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP12]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_shuf_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_shuf_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X3:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> [[X3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4) + ret <16 x float> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxs_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxs_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxu_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.umax.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmaxu_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.umax.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmins_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmins_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pminu_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pminu_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.umin.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) #0 { +; +; CHECK-LABEL: @test_mm_mask_move_ss( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[__U:%.*]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[__U]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = or i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i8 [[TMP11]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = and i8 [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[__B:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[__W:%.*]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP]], i32 [[_MSPROP1]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP18]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], [[_MSPROP]] +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP_ICMP]], i32 [[TMP24]], i32 [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP16]], float [[TMP17]], float [[TMP18]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[__A:%.*]], float [[TMP25]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP26]] +; +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U) + ret <4 x float> %res +} + + +define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) #0 { +; +; CHECK-LABEL: @test_mm_maskz_move_ss( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[__U:%.*]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[__U]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i8 [[TMP10]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i8 [[TMP13]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[__B:%.*]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 [[_MSPROP]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP16]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = xor i32 [[TMP18]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP]] +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP_ICMP]], i32 [[TMP21]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP15]], float [[TMP16]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x float> [[__A:%.*]], float [[TMP22]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP23]] +; +entry: + %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U) + ret <4 x float> %res +} + +define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) #0 { +; +; CHECK-LABEL: @test_mm_mask_move_sd( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[__U:%.*]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[__U]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = xor i8 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = or i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = xor i8 [[TMP11]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = and i8 [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[__B:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[__W:%.*]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP16]], i64 [[_MSPROP]], i64 [[_MSPROP1]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast double [[TMP18]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP22]], [[_MSPROP]] +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP_ICMP]], i64 [[TMP24]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP16]], double [[TMP17]], double [[TMP18]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[__A:%.*]], double [[TMP25]], i64 0 +; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP26]] +; +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U) + ret <2 x double> %res +} + +define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) #0 { +; +; CHECK-LABEL: @test_mm_maskz_move_sd( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP0]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[__U:%.*]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = or i8 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[__U]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = xor i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP10]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor i8 [[TMP10]], -1 +; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i8 [[TMP13]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and i1 [[TMP11]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[__B:%.*]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i64 [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast double [[TMP16]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP]] +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP20]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP_ICMP]], i64 [[TMP21]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP15]], double [[TMP16]], double 0.000000e+00 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x double> [[__A:%.*]], double [[TMP22]], i64 0 +; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP23]] +; +entry: + %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8) +declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8) + +declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_512(<16 x i8> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_512(<16 x i8> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_512(<8 x i32> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i32> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i16> [[TMP2]] to <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i16> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_512(<16 x i16> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i16> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovzxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovzxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i16> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_512(<8 x i16> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovzxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[TMP2]] to <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_512(<16 x i8> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxb_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i8> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i8> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_512(<16 x i8> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxb_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> splat (i8 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[X0:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i32> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_512(<8 x i32> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxd_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> splat (i32 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[X0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i32> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i32> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i16> [[TMP2]] to <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i16> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_512(<16 x i16> %x0, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxw_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> splat (i16 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[X0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <16 x i16> [[_MSPROP]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i16> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovsxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[TMP2]] to <8 x i64> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovsxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_512(<8 x i16> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovsxw_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[X0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i16> [[_MSPROP]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i16> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prolv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prolv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + ret <16 x i32> %3 +} + +define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prolv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prolv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + ret <8 x i64> %1 +} + +define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prolv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + ret <8 x i64> %3 +} + +define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prolv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 +} + +declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prorv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prorv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + ret <16 x i32> %3 +} + +define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prorv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prorv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + ret <8 x i64> %1 +} + +define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prorv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + ret <8 x i64> %3 +} + +define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prorv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 +} + +declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_prol_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 4) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 5) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %6, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %7, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_prol_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 4) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 5) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %6, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %7, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_pror_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 4) + %5 = bitcast i16 %x3 to <16 x i1> + %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer + %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 5) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %6, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %7, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_pror_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 4) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer + %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 5) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %3, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %6, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %7, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[X0:%.*]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[X0]], i32 5) +; CHECK-NEXT: [[TMP17:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 6) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[X0]], i32 6) +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP18]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP20]], <8 x i64> [[TMP25]], <8 x i64> [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP19]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[TMP15]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP16]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[_MSPROP_SELECT1]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 6, <8 x i64> zeroinitializer, i8 %x3) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <16 x i32>@llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[X0:%.*]], i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[X0]], i32 5) +; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 6) +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[X0]], i32 6) +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[TMP18]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = xor <16 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i32> [[TMP23]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP20]], <16 x i32> [[TMP25]], <16 x i32> [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[TMP19]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[TMP15]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP16]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[_MSPROP_SELECT1]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 6, <16 x i32> zeroinitializer, i16 %x3) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psra_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psra_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psll_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_psll_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 3) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[X0:%.*]], i32 3) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 4) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[X0]], i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 5) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[X0]], i32 5) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP18]], <16 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP12]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP19]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psll_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP18]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i64> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP18]], <8 x i64> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP12]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psll_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP17]], <8 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP11]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP18]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP18]], <16 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP12]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP19]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrl_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP18]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i64> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP18]], <8 x i64> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP12]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrl_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP17]], <8 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP11]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP18]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP18]], <16 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP12]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP19]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psra_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP17]], <16 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> [[TMP11]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP18]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psra_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psra_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i64> [[TMP12]], [[A2:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP18]], <8 x i64> [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP14]], <8 x i64> [[TMP12]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psra_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP17]], <8 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP11]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP18]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psllv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psllv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psllv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psllv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone + + +define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrav_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrav_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrav_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrav_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrav_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrav_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrlv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_mask_psrlv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[A2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, ptr %ptr) #0 { +; +; CHECK-LABEL: @test_x86_avx512_psrlv_q_memop( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <8 x i64> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = sext <8 x i1> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP2]], <8 x i64> [[B]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[B]]) +; CHECK-NEXT: store <8 x i64> [[TMP11]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %b = load <8 x i64>, ptr %ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvt_dq2pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[CVT:%.*]] = sitofp <8 x i32> [[X0:%.*]] to <8 x double> +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[CVT]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[CVT:%.*]] = sitofp <8 x i32> [[X0:%.*]] to <8 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[CVT]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X1:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[CVT]], <8 x double> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_cvt_udq2pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[CVT:%.*]] = uitofp <8 x i32> [[X0:%.*]] to <8 x double> +; CHECK-NEXT: store <8 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[CVT]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[CVT:%.*]] = uitofp <8 x i32> [[X0:%.*]] to <8 x double> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[CVT]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X1:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[CVT]], <8 x double> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) + ret <8 x double> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_vcvtph2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) #0 { +; CHECK-LABEL: @test_x86_vcvtph2ps_512_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_vcvtph2ps_512_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> [[A0:%.*]], <16 x float> [[A1:%.*]], i16 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_vcvtph2ps_512_sae_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> [[A0:%.*]], <16 x float> zeroinitializer, i16 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_x86_vcvtph2ps_512_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i16> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> [[A0:%.*]], <16 x float> zeroinitializer, i16 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly + +define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) #0 { +; CHECK-LABEL: @test_valign_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[PALIGNR]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_valign_q( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <8 x i64> [[B:%.*]], <8 x i64> [[A:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[PALIGNR]], [[SRC:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[PALIGNR]], <8 x i64> [[SRC]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8) + +define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_valign_d( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[PALIGNR:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> [[A:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[PALIGNR]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[PALIGNR]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16) + +declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP7]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP18]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + + +define <16 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) + ret <16 x float> %res +} + +; Test case to make sure we can print shuffle decode comments for constant pool loads. +define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> ) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[X2]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP17]], 0 +; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> ) +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP22]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x float> [[TMP20]] to <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = xor <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <16 x i32> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = or <16 x i32> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> [[TMP27]], <16 x i32> [[TMP23]] +; CHECK-NEXT: [[TMP28:%.*]] = select <16 x i1> [[TMP22]], <16 x float> [[TMP20]], <16 x float> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP29]], 0 +; CHECK-NEXT: br i1 [[_MSCMP4]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], [[_MSPROP_SELECT1]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[TMP16]], [[TMP28]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i32> zeroinitializer, [[_MSPROP]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[TMP32]], [[RES3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES4]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> zeroinitializer, i16 %x3) + %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> , <16 x float> %x2, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} + +define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_mul_epi32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP10:%.*]] = ashr <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = ashr <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP12:%.*]] = ashr <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = ashr <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP7]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = ashr <8 x i64> [[TMP17]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = mul <8 x i64> [[TMP14]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <8 x i64> [[TMP21]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], [[_MSPROP]] +; CHECK-NEXT: [[TMP27:%.*]] = or <8 x i64> [[TMP26]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP27]], <8 x i64> [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP28]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP11:%.*]] = ashr <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = ashr <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP7]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[_MSPROP]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP26]], <8 x i64> [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP20]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP27]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = shl <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = ashr <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = ashr <8 x i64> [[TMP19]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP16]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP24]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP26]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP26]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP33]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP25]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP32]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP25]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = ashr <8 x i64> [[TMP17]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = shl <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = ashr <8 x i64> [[TMP23]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <8 x i64> [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP30:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <8 x i64> [[TMP27]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i64> [[TMP32]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP33]], <8 x i64> [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[TMP27]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP34]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmbk_buildvector(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmbk_buildvector( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i64> [[_MSPROP]], i64 [[_MSLD]], i32 1 +; CHECK-NEXT: [[VECINIT_I1:%.*]] = insertelement <8 x i64> [[VECINIT_I]], i64 [[Q]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i64> [[_MSPROP1]], i64 [[_MSLD]], i32 2 +; CHECK-NEXT: [[VECINIT_I2:%.*]] = insertelement <8 x i64> [[VECINIT_I1]], i64 [[Q]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i64> [[_MSPROP2]], i64 [[_MSLD]], i32 3 +; CHECK-NEXT: [[VECINIT_I3:%.*]] = insertelement <8 x i64> [[VECINIT_I2]], i64 [[Q]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i64> [[_MSPROP3]], i64 [[_MSLD]], i32 4 +; CHECK-NEXT: [[VECINIT_I4:%.*]] = insertelement <8 x i64> [[VECINIT_I3]], i64 [[Q]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i64> [[_MSPROP4]], i64 [[_MSLD]], i32 5 +; CHECK-NEXT: [[VECINIT_I5:%.*]] = insertelement <8 x i64> [[VECINIT_I4]], i64 [[Q]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i64> [[_MSPROP5]], i64 [[_MSLD]], i32 6 +; CHECK-NEXT: [[VECINIT_I6:%.*]] = insertelement <8 x i64> [[VECINIT_I5]], i64 [[Q]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i64> [[_MSPROP6]], i64 [[_MSLD]], i32 7 +; CHECK-NEXT: [[B64:%.*]] = insertelement <8 x i64> [[VECINIT_I6]], i64 [[Q]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[_MSPROP7]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = ashr <8 x i64> [[TMP17]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = shl <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = ashr <8 x i64> [[TMP23]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP8:%.*]] = or <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <8 x i64> [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP30:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[_MSPROP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <8 x i64> [[TMP27]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[_MSPROP8]] +; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i64> [[TMP32]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP33]], <8 x i64> [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[TMP27]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP34]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement < 8 x i64> undef, i64 %q, i32 0 + %vecinit.i1 = insertelement < 8 x i64> %vecinit.i, i64 %q, i32 1 + %vecinit.i2 = insertelement < 8 x i64> %vecinit.i1, i64 %q, i32 2 + %vecinit.i3 = insertelement < 8 x i64> %vecinit.i2, i64 %q, i32 3 + %vecinit.i4 = insertelement < 8 x i64> %vecinit.i3, i64 %q, i32 4 + %vecinit.i5 = insertelement < 8 x i64> %vecinit.i4, i64 %q, i32 5 + %vecinit.i6 = insertelement < 8 x i64> %vecinit.i5, i64 %q, i32 6 + %b64 = insertelement < 8 x i64> %vecinit.i6, i64 %q, i32 7 + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP26]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP33]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epi32_rmbkz_buildvector(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epi32_rmbkz_buildvector( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i64> [[_MSPROP]], i64 [[_MSLD]], i32 1 +; CHECK-NEXT: [[VECINIT_I1:%.*]] = insertelement <8 x i64> [[VECINIT_I]], i64 [[Q]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i64> [[_MSPROP1]], i64 [[_MSLD]], i32 2 +; CHECK-NEXT: [[VECINIT_I2:%.*]] = insertelement <8 x i64> [[VECINIT_I1]], i64 [[Q]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i64> [[_MSPROP2]], i64 [[_MSLD]], i32 3 +; CHECK-NEXT: [[VECINIT_I3:%.*]] = insertelement <8 x i64> [[VECINIT_I2]], i64 [[Q]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i64> [[_MSPROP3]], i64 [[_MSLD]], i32 4 +; CHECK-NEXT: [[VECINIT_I4:%.*]] = insertelement <8 x i64> [[VECINIT_I3]], i64 [[Q]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i64> [[_MSPROP4]], i64 [[_MSLD]], i32 5 +; CHECK-NEXT: [[VECINIT_I5:%.*]] = insertelement <8 x i64> [[VECINIT_I4]], i64 [[Q]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i64> [[_MSPROP5]], i64 [[_MSLD]], i32 6 +; CHECK-NEXT: [[VECINIT_I6:%.*]] = insertelement <8 x i64> [[VECINIT_I5]], i64 [[Q]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i64> [[_MSPROP6]], i64 [[_MSLD]], i32 7 +; CHECK-NEXT: [[B64:%.*]] = insertelement <8 x i64> [[VECINIT_I6]], i64 [[Q]], i32 7 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSPROP7]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP8:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[_MSPROP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP8]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP26]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP33]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement < 8 x i64> undef, i64 %q, i32 0 + %vecinit.i1 = insertelement < 8 x i64> %vecinit.i, i64 %q, i32 1 + %vecinit.i2 = insertelement < 8 x i64> %vecinit.i1, i64 %q, i32 2 + %vecinit.i3 = insertelement < 8 x i64> %vecinit.i2, i64 %q, i32 3 + %vecinit.i4 = insertelement < 8 x i64> %vecinit.i3, i64 %q, i32 4 + %vecinit.i5 = insertelement < 8 x i64> %vecinit.i4, i64 %q, i32 5 + %vecinit.i6 = insertelement < 8 x i64> %vecinit.i5, i64 %q, i32 6 + %b64 = insertelement < 8 x i64> %vecinit.i6, i64 %q, i32 7 + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) + +define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mask_mul_epu32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP7]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP8]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = mul <8 x i64> [[TMP14]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP24:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <8 x i64> [[TMP21]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], [[_MSPROP]] +; CHECK-NEXT: [[TMP27:%.*]] = or <8 x i64> [[TMP26]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP27]], <8 x i64> [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[TMP23]], <8 x i64> [[TMP21]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP28]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP7]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[_MSPROP]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP26]], <8 x i64> [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP20]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP27]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP8]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP16]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP24]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP26]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP26]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP33]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP25]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP32]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP25]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[TMP26:%.*]] = and <8 x i64> [[TMP14]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <8 x i64> [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP30:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <8 x i64> [[TMP27]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i64> [[TMP32]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP33]], <8 x i64> [[TMP30]] +; CHECK-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP29]], <8 x i64> [[TMP27]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP34]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_mul_epu32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP26]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP33]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask) + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8) + +define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_vextractf32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[A:%.*]], <16 x float> [[A]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[_MSPROP]], <4 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x float> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i32> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP1]], <4 x i32> [[TMP12]], <4 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[EXTRACT]], <4 x float> [[TMP4]], <4 x float> [[B]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP13]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) + ret <4 x float> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8) + +define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_vextracti64x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[A:%.*]], <8 x i64> [[A]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[_MSPROP]], <4 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i64> [[TMP4]], [[B:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP1]], <4 x i64> [[TMP10]], <4 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP4]], <4 x i64> [[B]] +; CHECK-NEXT: store <4 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i64> [[TMP11]] +; + %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8) + +define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_vextracti32x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[A]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP5]], <8 x i1> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[_MSPROP]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[_MSPROP1]], <4 x i32> [[TMP9]], <4 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP10]] +; + %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8) + +define <4 x double> @test_vextractf64x4(<8 x double> %a) #0 { +; CHECK-LABEL: @test_vextractf64x4( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[A:%.*]], <8 x double> [[A]], <4 x i32> +; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x double> [[TMP2]] +; + %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1) + ret <4 x double> %res +} + +declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8) + +declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_insertf32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[X1:%.*]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP4]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_insertf32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[X1:%.*]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[X3:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP14]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> [[X3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP15]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_maskz_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_insertf32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[X1:%.*]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[X0:%.*]], <16 x float> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP12]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) + ret <16 x float> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_inserti32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[X1:%.*]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_inserti32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[X1:%.*]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X3:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X3]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP13]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_inserti32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[X1:%.*]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[_MSPROP]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[X0:%.*]], <16 x i32> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) + ret <16 x i32> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_insertf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[X1:%.*]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP4]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_insertf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 160) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[X1:%.*]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[X3:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP14]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> [[X3]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP15]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_insertf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[X1:%.*]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> [[X0:%.*]], <8 x double> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP12]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP7]], <8 x double> [[TMP5]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) + ret <8 x double> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_inserti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[X1:%.*]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_inserti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 160) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[X1:%.*]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X3:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X3]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP13]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_inserti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> splat (i64 -1), <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[X1:%.*]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[_MSPROP]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[X0:%.*]], <8 x i64> [[TMP4]], <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_movntdqa(ptr %a0) #0 { +; +; CHECK-LABEL: @test_x86_avx512_movntdqa( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[A0:%.*]], align 64, !nontemporal [[META2]] +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A0]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP7]], align 64 +; CHECK-NEXT: store <8 x i64> [[_MSLD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.movntdqa(ptr %a0) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.movntdqa(ptr) nounwind readonly + +define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_cmp_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[TMP10]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i32> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP12]], [[TMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i32> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP16]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <16 x i32> [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <16 x i32> [[TMP15]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor <16 x i1> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp slt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i1> [[TMP22]] to i16 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i1> [[TMP23]] to i16 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[_MSPROP]], i16 [[TMP24]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[VEC0]], i16 [[TMP25]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP27:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <16 x i32> [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <16 x i32> [[TMP26]], [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP31:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP32:%.*]] = and <16 x i32> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = or <16 x i32> [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ule <16 x i32> [[TMP28]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp ule <16 x i32> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = xor <16 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp sle <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP38:%.*]] = bitcast <16 x i1> [[TMP36]] to i16 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i1> [[TMP37]] to i16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP38]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[TMP39]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 0, i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 0, i32 3 +; CHECK-NEXT: [[TMP40:%.*]] = xor <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP41:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <16 x i32> [[TMP41]], zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = xor <16 x i32> [[TMP41]], splat (i32 -1) +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i32> [[TMP43]], [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq <16 x i32> [[TMP44]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <16 x i1> [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP4]] to i16 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <16 x i1> [[TMP46]] to i16 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP47]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[TMP48]], i32 4 +; CHECK-NEXT: [[TMP49:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP50:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP51:%.*]] = and <16 x i32> [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = or <16 x i32> [[TMP49]], [[TMP1]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP54:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP55:%.*]] = and <16 x i32> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = or <16 x i32> [[TMP53]], [[TMP2]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp uge <16 x i32> [[TMP51]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = icmp uge <16 x i32> [[TMP52]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = xor <16 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = icmp sge <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <16 x i1> [[TMP59]] to i16 +; CHECK-NEXT: [[TMP62:%.*]] = bitcast <16 x i1> [[TMP60]] to i16 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i16> [[_MSPROP5]], i16 [[TMP61]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[TMP62]], i32 5 +; CHECK-NEXT: [[TMP63:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP64:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP65:%.*]] = and <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = or <16 x i32> [[TMP63]], [[TMP1]] +; CHECK-NEXT: [[TMP67:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP68:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP69:%.*]] = and <16 x i32> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = or <16 x i32> [[TMP67]], [[TMP2]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp ugt <16 x i32> [[TMP65]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = icmp ugt <16 x i32> [[TMP66]], [[TMP69]] +; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i1> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP75:%.*]] = bitcast <16 x i1> [[TMP73]] to i16 +; CHECK-NEXT: [[TMP76:%.*]] = bitcast <16 x i1> [[TMP74]] to i16 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i16> [[_MSPROP6]], i16 [[TMP75]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[TMP76]], i32 6 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i16> [[_MSPROP7]], i16 0, i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 -1, i32 7 +; CHECK-NEXT: store <8 x i16> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[VEC7]] +; + %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) + %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 + %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) + %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 + %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) + %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 + %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) + %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 + %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) + %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 + %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) + %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 + %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) + %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 + %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) + %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 + ret <8 x i16> %vec7 +} + +define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_cmp_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP5]], splat (i32 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i1> [[TMP17]] to i16 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[TMP19]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP22:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <16 x i32> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i32> [[TMP21]], [[TMP1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP26:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP27:%.*]] = and <16 x i32> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = or <16 x i32> [[TMP25]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult <16 x i32> [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <16 x i32> [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <16 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp slt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP35:%.*]] = and <16 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = and <16 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i1> [[TMP31]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = or <16 x i1> [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <16 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = and <16 x i1> [[TMP32]], [[TMP34]] +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i1> [[TMP39]] to i16 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i1> [[TMP40]] to i16 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[_MSPROP]], i16 [[TMP41]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[VEC0]], i16 [[TMP42]], i32 1 +; CHECK-NEXT: [[TMP43:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP44:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <16 x i32> [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <16 x i32> [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP47:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP48:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i32> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i32> [[TMP47]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp ule <16 x i32> [[TMP45]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp ule <16 x i32> [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <16 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp sle <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP57:%.*]] = and <16 x i1> [[TMP53]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = and <16 x i1> [[TMP53]], [[TMP56]] +; CHECK-NEXT: [[TMP60:%.*]] = or <16 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP61:%.*]] = or <16 x i1> [[TMP60]], [[TMP59]] +; CHECK-NEXT: [[TMP62:%.*]] = and <16 x i1> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP63:%.*]] = bitcast <16 x i1> [[TMP61]] to i16 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast <16 x i1> [[TMP62]] to i16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP63]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[TMP64]], i32 2 +; CHECK-NEXT: [[TMP65:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP66:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP67:%.*]] = and <16 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <16 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP69:%.*]] = and <16 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = or <16 x i1> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP71:%.*]] = or <16 x i1> [[TMP70]], [[TMP69]] +; CHECK-NEXT: [[TMP72:%.*]] = and <16 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP73:%.*]] = bitcast <16 x i1> [[TMP71]] to i16 +; CHECK-NEXT: [[TMP74:%.*]] = bitcast <16 x i1> [[TMP72]] to i16 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP73]], i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[TMP74]], i32 3 +; CHECK-NEXT: [[TMP75:%.*]] = xor <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP76:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <16 x i32> [[TMP76]], zeroinitializer +; CHECK-NEXT: [[TMP78:%.*]] = xor <16 x i32> [[TMP76]], splat (i32 -1) +; CHECK-NEXT: [[TMP79:%.*]] = and <16 x i32> [[TMP78]], [[TMP75]] +; CHECK-NEXT: [[TMP80:%.*]] = icmp eq <16 x i32> [[TMP79]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <16 x i1> [[TMP77]], [[TMP80]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp ne <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP82:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP83:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP84:%.*]] = and <16 x i1> [[_MSPROP_ICMP4]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <16 x i1> [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP86:%.*]] = and <16 x i1> [[_MSPROP_ICMP4]], [[TMP83]] +; CHECK-NEXT: [[TMP87:%.*]] = or <16 x i1> [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = or <16 x i1> [[TMP87]], [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = and <16 x i1> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <16 x i1> [[TMP88]] to i16 +; CHECK-NEXT: [[TMP91:%.*]] = bitcast <16 x i1> [[TMP89]] to i16 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP90]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[TMP91]], i32 4 +; CHECK-NEXT: [[TMP92:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP93:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP94:%.*]] = and <16 x i32> [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = or <16 x i32> [[TMP92]], [[TMP1]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP97:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP98:%.*]] = and <16 x i32> [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = or <16 x i32> [[TMP96]], [[TMP2]] +; CHECK-NEXT: [[TMP100:%.*]] = icmp uge <16 x i32> [[TMP94]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = icmp uge <16 x i32> [[TMP95]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = xor <16 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp sge <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP104:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = and <16 x i1> [[TMP102]], [[TMP104]] +; CHECK-NEXT: [[TMP107:%.*]] = and <16 x i1> [[TMP103]], [[TMP104]] +; CHECK-NEXT: [[TMP108:%.*]] = and <16 x i1> [[TMP102]], [[TMP105]] +; CHECK-NEXT: [[TMP109:%.*]] = or <16 x i1> [[TMP106]], [[TMP107]] +; CHECK-NEXT: [[TMP110:%.*]] = or <16 x i1> [[TMP109]], [[TMP108]] +; CHECK-NEXT: [[TMP111:%.*]] = and <16 x i1> [[TMP103]], [[TMP105]] +; CHECK-NEXT: [[TMP112:%.*]] = bitcast <16 x i1> [[TMP110]] to i16 +; CHECK-NEXT: [[TMP113:%.*]] = bitcast <16 x i1> [[TMP111]] to i16 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i16> [[_MSPROP5]], i16 [[TMP112]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[TMP113]], i32 5 +; CHECK-NEXT: [[TMP114:%.*]] = xor <16 x i32> [[A0]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP115:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP116:%.*]] = and <16 x i32> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = or <16 x i32> [[TMP114]], [[TMP1]] +; CHECK-NEXT: [[TMP118:%.*]] = xor <16 x i32> [[A1]], splat (i32 -2147483648) +; CHECK-NEXT: [[TMP119:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP120:%.*]] = and <16 x i32> [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = or <16 x i32> [[TMP118]], [[TMP2]] +; CHECK-NEXT: [[TMP122:%.*]] = icmp ugt <16 x i32> [[TMP116]], [[TMP121]] +; CHECK-NEXT: [[TMP123:%.*]] = icmp ugt <16 x i32> [[TMP117]], [[TMP120]] +; CHECK-NEXT: [[TMP124:%.*]] = xor <16 x i1> [[TMP122]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = icmp sgt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP127:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP128:%.*]] = and <16 x i1> [[TMP124]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = and <16 x i1> [[TMP125]], [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = and <16 x i1> [[TMP124]], [[TMP127]] +; CHECK-NEXT: [[TMP131:%.*]] = or <16 x i1> [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[TMP132:%.*]] = or <16 x i1> [[TMP131]], [[TMP130]] +; CHECK-NEXT: [[TMP133:%.*]] = and <16 x i1> [[TMP125]], [[TMP127]] +; CHECK-NEXT: [[TMP134:%.*]] = bitcast <16 x i1> [[TMP132]] to i16 +; CHECK-NEXT: [[TMP135:%.*]] = bitcast <16 x i1> [[TMP133]] to i16 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i16> [[_MSPROP6]], i16 [[TMP134]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[TMP135]], i32 6 +; CHECK-NEXT: [[TMP136:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP137:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP138:%.*]] = and <16 x i1> zeroinitializer, [[TMP136]] +; CHECK-NEXT: [[TMP139:%.*]] = and <16 x i1> splat (i1 true), [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and <16 x i1> zeroinitializer, [[TMP137]] +; CHECK-NEXT: [[TMP141:%.*]] = or <16 x i1> [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = or <16 x i1> [[TMP141]], [[TMP140]] +; CHECK-NEXT: [[TMP143:%.*]] = and <16 x i1> splat (i1 true), [[TMP137]] +; CHECK-NEXT: [[TMP144:%.*]] = bitcast <16 x i1> [[TMP142]] to i16 +; CHECK-NEXT: [[TMP145:%.*]] = bitcast <16 x i1> [[TMP143]] to i16 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i16> [[_MSPROP7]], i16 [[TMP144]], i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 [[TMP145]], i32 7 +; CHECK-NEXT: store <8 x i16> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[VEC7]] +; + %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) + %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 + %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) + %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 + %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) + %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 + %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) + %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 + %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) + %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 + %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) + %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 + %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) + %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 + %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) + %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 + ret <8 x i16> %vec7 +} + +declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone + +define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_ucmp_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <16 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i32> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[TMP10]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[A0]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i32> [[A1]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult <16 x i32> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult <16 x i32> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i1> [[TMP20]] to i16 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i1> [[TMP21]] to i16 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[_MSPROP]], i16 [[TMP22]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[VEC0]], i16 [[TMP23]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <16 x i32> [[A0]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <16 x i32> [[A1]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ule <16 x i32> [[TMP25]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ule <16 x i32> [[TMP26]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = xor <16 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ule <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <16 x i1> [[TMP32]] to i16 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <16 x i1> [[TMP33]] to i16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP34]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[TMP35]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 0, i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 0, i32 3 +; CHECK-NEXT: [[TMP36:%.*]] = xor <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP37:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne <16 x i32> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[TMP39:%.*]] = xor <16 x i32> [[TMP37]], splat (i32 -1) +; CHECK-NEXT: [[TMP40:%.*]] = and <16 x i32> [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <16 x i32> [[TMP40]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <16 x i1> [[TMP38]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP4]] to i16 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <16 x i1> [[TMP42]] to i16 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP43]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[TMP44]], i32 4 +; CHECK-NEXT: [[TMP45:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP46:%.*]] = and <16 x i32> [[A0]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP48:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <16 x i32> [[A1]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp uge <16 x i32> [[TMP46]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp uge <16 x i32> [[TMP47]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <16 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp uge <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <16 x i1> [[TMP53]] to i16 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast <16 x i1> [[TMP54]] to i16 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i16> [[_MSPROP5]], i16 [[TMP55]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[TMP56]], i32 5 +; CHECK-NEXT: [[TMP57:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i32> [[A0]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP60:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[A1]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP63:%.*]] = icmp ugt <16 x i32> [[TMP58]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp ugt <16 x i32> [[TMP59]], [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = xor <16 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = icmp ugt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP67:%.*]] = bitcast <16 x i1> [[TMP65]] to i16 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast <16 x i1> [[TMP66]] to i16 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i16> [[_MSPROP6]], i16 [[TMP67]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[TMP68]], i32 6 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i16> [[_MSPROP7]], i16 0, i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 -1, i32 7 +; CHECK-NEXT: store <8 x i16> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[VEC7]] +; + %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1) + %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 + %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1) + %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 + %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1) + %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 + %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1) + %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 + %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1) + %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 + %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1) + %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 + %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1) + %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 + %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1) + %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 + ret <8 x i16> %vec7 +} + +define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_ucmp_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i32> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[TMP5]], splat (i32 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i1> [[TMP17]] to i16 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <16 x i1> [[TMP18]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> splat (i16 -1), i16 [[TMP19]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i16> undef, i16 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i32> [[A0]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <16 x i32> [[A1]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult <16 x i32> [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ult <16 x i32> [[TMP23]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <16 x i1> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = and <16 x i1> [[TMP29]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = and <16 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and <16 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = or <16 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = or <16 x i1> [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <16 x i1> [[TMP37]] to i16 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i1> [[TMP38]] to i16 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i16> [[_MSPROP]], i16 [[TMP39]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i16> [[VEC0]], i16 [[TMP40]], i32 1 +; CHECK-NEXT: [[TMP41:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP42:%.*]] = and <16 x i32> [[A0]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP44:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <16 x i32> [[A1]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp ule <16 x i32> [[TMP42]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = icmp ule <16 x i32> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <16 x i1> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp ule <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP51:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP53:%.*]] = and <16 x i1> [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = and <16 x i1> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = and <16 x i1> [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = or <16 x i1> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP57:%.*]] = or <16 x i1> [[TMP56]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <16 x i1> [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP59:%.*]] = bitcast <16 x i1> [[TMP57]] to i16 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast <16 x i1> [[TMP58]] to i16 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i16> [[_MSPROP1]], i16 [[TMP59]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i16> [[VEC1]], i16 [[TMP60]], i32 2 +; CHECK-NEXT: [[TMP61:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP62:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP63:%.*]] = and <16 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = and <16 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = and <16 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP66:%.*]] = or <16 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = or <16 x i1> [[TMP66]], [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <16 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP69:%.*]] = bitcast <16 x i1> [[TMP67]] to i16 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast <16 x i1> [[TMP68]] to i16 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i16> [[_MSPROP2]], i16 [[TMP69]], i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i16> [[VEC2]], i16 [[TMP70]], i32 3 +; CHECK-NEXT: [[TMP71:%.*]] = xor <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP72:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <16 x i32> [[TMP72]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = xor <16 x i32> [[TMP72]], splat (i32 -1) +; CHECK-NEXT: [[TMP75:%.*]] = and <16 x i32> [[TMP74]], [[TMP71]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp eq <16 x i32> [[TMP75]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <16 x i1> [[TMP73]], [[TMP76]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP78:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP79:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP80:%.*]] = and <16 x i1> [[_MSPROP_ICMP4]], [[TMP78]] +; CHECK-NEXT: [[TMP81:%.*]] = and <16 x i1> [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP82:%.*]] = and <16 x i1> [[_MSPROP_ICMP4]], [[TMP79]] +; CHECK-NEXT: [[TMP83:%.*]] = or <16 x i1> [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP84:%.*]] = or <16 x i1> [[TMP83]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <16 x i1> [[TMP77]], [[TMP79]] +; CHECK-NEXT: [[TMP86:%.*]] = bitcast <16 x i1> [[TMP84]] to i16 +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <16 x i1> [[TMP85]] to i16 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i16> [[_MSPROP3]], i16 [[TMP86]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i16> [[VEC3]], i16 [[TMP87]], i32 4 +; CHECK-NEXT: [[TMP88:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP89:%.*]] = and <16 x i32> [[A0]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP91:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP92:%.*]] = and <16 x i32> [[A1]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp uge <16 x i32> [[TMP89]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp uge <16 x i32> [[TMP90]], [[TMP92]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <16 x i1> [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp uge <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP98:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP99:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP100:%.*]] = and <16 x i1> [[TMP96]], [[TMP98]] +; CHECK-NEXT: [[TMP101:%.*]] = and <16 x i1> [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = and <16 x i1> [[TMP96]], [[TMP99]] +; CHECK-NEXT: [[TMP103:%.*]] = or <16 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = or <16 x i1> [[TMP103]], [[TMP102]] +; CHECK-NEXT: [[TMP105:%.*]] = and <16 x i1> [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP106:%.*]] = bitcast <16 x i1> [[TMP104]] to i16 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast <16 x i1> [[TMP105]] to i16 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i16> [[_MSPROP5]], i16 [[TMP106]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i16> [[VEC4]], i16 [[TMP107]], i32 5 +; CHECK-NEXT: [[TMP108:%.*]] = xor <16 x i32> [[TMP1]], splat (i32 -1) +; CHECK-NEXT: [[TMP109:%.*]] = and <16 x i32> [[A0]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = or <16 x i32> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP111:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: [[TMP112:%.*]] = and <16 x i32> [[A1]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = or <16 x i32> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp ugt <16 x i32> [[TMP109]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = icmp ugt <16 x i32> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = xor <16 x i1> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = icmp ugt <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP118:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP120:%.*]] = and <16 x i1> [[TMP116]], [[TMP118]] +; CHECK-NEXT: [[TMP121:%.*]] = and <16 x i1> [[TMP117]], [[TMP118]] +; CHECK-NEXT: [[TMP122:%.*]] = and <16 x i1> [[TMP116]], [[TMP119]] +; CHECK-NEXT: [[TMP123:%.*]] = or <16 x i1> [[TMP120]], [[TMP121]] +; CHECK-NEXT: [[TMP124:%.*]] = or <16 x i1> [[TMP123]], [[TMP122]] +; CHECK-NEXT: [[TMP125:%.*]] = and <16 x i1> [[TMP117]], [[TMP119]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast <16 x i1> [[TMP124]] to i16 +; CHECK-NEXT: [[TMP127:%.*]] = bitcast <16 x i1> [[TMP125]] to i16 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i16> [[_MSPROP6]], i16 [[TMP126]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i16> [[VEC5]], i16 [[TMP127]], i32 6 +; CHECK-NEXT: [[TMP128:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP129:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP130:%.*]] = and <16 x i1> zeroinitializer, [[TMP128]] +; CHECK-NEXT: [[TMP131:%.*]] = and <16 x i1> splat (i1 true), [[TMP128]] +; CHECK-NEXT: [[TMP132:%.*]] = and <16 x i1> zeroinitializer, [[TMP129]] +; CHECK-NEXT: [[TMP133:%.*]] = or <16 x i1> [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP134:%.*]] = or <16 x i1> [[TMP133]], [[TMP132]] +; CHECK-NEXT: [[TMP135:%.*]] = and <16 x i1> splat (i1 true), [[TMP129]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast <16 x i1> [[TMP134]] to i16 +; CHECK-NEXT: [[TMP137:%.*]] = bitcast <16 x i1> [[TMP135]] to i16 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i16> [[_MSPROP7]], i16 [[TMP136]], i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i16> [[VEC6]], i16 [[TMP137]], i32 7 +; CHECK-NEXT: store <8 x i16> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[VEC7]] +; + %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask) + %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 + %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask) + %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1 + %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask) + %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2 + %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask) + %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3 + %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask) + %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4 + %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask) + %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5 + %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask) + %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6 + %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask) + %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7 + ret <8 x i16> %vec7 +} + +declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone + +define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_cmp_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP]] to i8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> splat (i8 -1), i8 [[TMP10]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i8> undef, i8 [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP12]], [[TMP1]] +; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP16]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp ult <8 x i64> [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <8 x i64> [[TMP15]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i1> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp slt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i1> [[TMP22]] to i8 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x i1> [[TMP23]] to i8 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i8> [[_MSPROP]], i8 [[TMP24]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i8> [[VEC0]], i8 [[TMP25]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP27:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <8 x i64> [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <8 x i64> [[TMP26]], [[TMP1]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP31:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP32:%.*]] = and <8 x i64> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i64> [[TMP30]], [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = icmp ule <8 x i64> [[TMP28]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = icmp ule <8 x i64> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = xor <8 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = icmp sle <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP38:%.*]] = bitcast <8 x i1> [[TMP36]] to i8 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i1> [[TMP37]] to i8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i8> [[_MSPROP1]], i8 [[TMP38]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i8> [[VEC1]], i8 [[TMP39]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i8> [[_MSPROP2]], i8 0, i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i8> [[VEC2]], i8 0, i32 3 +; CHECK-NEXT: [[TMP40:%.*]] = xor <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP41:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <8 x i64> [[TMP41]], zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = xor <8 x i64> [[TMP41]], splat (i64 -1) +; CHECK-NEXT: [[TMP44:%.*]] = and <8 x i64> [[TMP43]], [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = icmp eq <8 x i64> [[TMP44]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <8 x i1> [[TMP42]], [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP47:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP4]] to i8 +; CHECK-NEXT: [[TMP48:%.*]] = bitcast <8 x i1> [[TMP46]] to i8 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i8> [[_MSPROP3]], i8 [[TMP47]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i8> [[VEC3]], i8 [[TMP48]], i32 4 +; CHECK-NEXT: [[TMP49:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP50:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP51:%.*]] = and <8 x i64> [[TMP49]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = or <8 x i64> [[TMP49]], [[TMP1]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP54:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP55:%.*]] = and <8 x i64> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = or <8 x i64> [[TMP53]], [[TMP2]] +; CHECK-NEXT: [[TMP57:%.*]] = icmp uge <8 x i64> [[TMP51]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = icmp uge <8 x i64> [[TMP52]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = xor <8 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = icmp sge <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <8 x i1> [[TMP59]] to i8 +; CHECK-NEXT: [[TMP62:%.*]] = bitcast <8 x i1> [[TMP60]] to i8 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i8> [[_MSPROP5]], i8 [[TMP61]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i8> [[VEC4]], i8 [[TMP62]], i32 5 +; CHECK-NEXT: [[TMP63:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP64:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP65:%.*]] = and <8 x i64> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = or <8 x i64> [[TMP63]], [[TMP1]] +; CHECK-NEXT: [[TMP67:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP68:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP69:%.*]] = and <8 x i64> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = or <8 x i64> [[TMP67]], [[TMP2]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp ugt <8 x i64> [[TMP65]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = icmp ugt <8 x i64> [[TMP66]], [[TMP69]] +; CHECK-NEXT: [[TMP73:%.*]] = xor <8 x i1> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP75:%.*]] = bitcast <8 x i1> [[TMP73]] to i8 +; CHECK-NEXT: [[TMP76:%.*]] = bitcast <8 x i1> [[TMP74]] to i8 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i8> [[_MSPROP6]], i8 [[TMP75]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i8> [[VEC5]], i8 [[TMP76]], i32 6 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i8> [[_MSPROP7]], i8 0, i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i8> [[VEC6]], i8 -1, i32 7 +; CHECK-NEXT: store <8 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[VEC7]] +; + %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) + %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 + %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) + %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 + %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) + %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 + %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) + %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 + %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) + %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 + %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) + %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 + %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) + %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 + %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) + %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 + ret <8 x i8> %vec7 +} + +define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_cmp_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP5]], splat (i64 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i1> [[TMP17]] to i8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> splat (i8 -1), i8 [[TMP19]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i8> undef, i8 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP22:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i64> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP21]], [[TMP1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP26:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i64> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i64> [[TMP25]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult <8 x i64> [[TMP23]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <8 x i64> [[TMP24]], [[TMP27]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <8 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp slt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP36:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = and <8 x i1> [[TMP31]], [[TMP34]] +; CHECK-NEXT: [[TMP38:%.*]] = or <8 x i1> [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = or <8 x i1> [[TMP38]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = and <8 x i1> [[TMP32]], [[TMP34]] +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i1> [[TMP39]] to i8 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <8 x i1> [[TMP40]] to i8 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i8> [[_MSPROP]], i8 [[TMP41]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i8> [[VEC0]], i8 [[TMP42]], i32 1 +; CHECK-NEXT: [[TMP43:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP44:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <8 x i64> [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <8 x i64> [[TMP43]], [[TMP1]] +; CHECK-NEXT: [[TMP47:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <8 x i64> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <8 x i64> [[TMP47]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp ule <8 x i64> [[TMP45]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp ule <8 x i64> [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <8 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp sle <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP56:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP57:%.*]] = and <8 x i1> [[TMP53]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <8 x i1> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = and <8 x i1> [[TMP53]], [[TMP56]] +; CHECK-NEXT: [[TMP60:%.*]] = or <8 x i1> [[TMP57]], [[TMP58]] +; CHECK-NEXT: [[TMP61:%.*]] = or <8 x i1> [[TMP60]], [[TMP59]] +; CHECK-NEXT: [[TMP62:%.*]] = and <8 x i1> [[TMP54]], [[TMP56]] +; CHECK-NEXT: [[TMP63:%.*]] = bitcast <8 x i1> [[TMP61]] to i8 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast <8 x i1> [[TMP62]] to i8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i8> [[_MSPROP1]], i8 [[TMP63]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i8> [[VEC1]], i8 [[TMP64]], i32 2 +; CHECK-NEXT: [[TMP65:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP66:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP67:%.*]] = and <8 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <8 x i1> zeroinitializer, [[TMP65]] +; CHECK-NEXT: [[TMP69:%.*]] = and <8 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP70:%.*]] = or <8 x i1> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP71:%.*]] = or <8 x i1> [[TMP70]], [[TMP69]] +; CHECK-NEXT: [[TMP72:%.*]] = and <8 x i1> zeroinitializer, [[TMP66]] +; CHECK-NEXT: [[TMP73:%.*]] = bitcast <8 x i1> [[TMP71]] to i8 +; CHECK-NEXT: [[TMP74:%.*]] = bitcast <8 x i1> [[TMP72]] to i8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i8> [[_MSPROP2]], i8 [[TMP73]], i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i8> [[VEC2]], i8 [[TMP74]], i32 3 +; CHECK-NEXT: [[TMP75:%.*]] = xor <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP76:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <8 x i64> [[TMP76]], zeroinitializer +; CHECK-NEXT: [[TMP78:%.*]] = xor <8 x i64> [[TMP76]], splat (i64 -1) +; CHECK-NEXT: [[TMP79:%.*]] = and <8 x i64> [[TMP78]], [[TMP75]] +; CHECK-NEXT: [[TMP80:%.*]] = icmp eq <8 x i64> [[TMP79]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <8 x i1> [[TMP77]], [[TMP80]] +; CHECK-NEXT: [[TMP81:%.*]] = icmp ne <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP82:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP83:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP84:%.*]] = and <8 x i1> [[_MSPROP_ICMP4]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <8 x i1> [[TMP81]], [[TMP82]] +; CHECK-NEXT: [[TMP86:%.*]] = and <8 x i1> [[_MSPROP_ICMP4]], [[TMP83]] +; CHECK-NEXT: [[TMP87:%.*]] = or <8 x i1> [[TMP84]], [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = or <8 x i1> [[TMP87]], [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = and <8 x i1> [[TMP81]], [[TMP83]] +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <8 x i1> [[TMP88]] to i8 +; CHECK-NEXT: [[TMP91:%.*]] = bitcast <8 x i1> [[TMP89]] to i8 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i8> [[_MSPROP3]], i8 [[TMP90]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i8> [[VEC3]], i8 [[TMP91]], i32 4 +; CHECK-NEXT: [[TMP92:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP93:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP94:%.*]] = and <8 x i64> [[TMP92]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = or <8 x i64> [[TMP92]], [[TMP1]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP97:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP98:%.*]] = and <8 x i64> [[TMP96]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = or <8 x i64> [[TMP96]], [[TMP2]] +; CHECK-NEXT: [[TMP100:%.*]] = icmp uge <8 x i64> [[TMP94]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = icmp uge <8 x i64> [[TMP95]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = xor <8 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP103:%.*]] = icmp sge <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP104:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP105:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP106:%.*]] = and <8 x i1> [[TMP102]], [[TMP104]] +; CHECK-NEXT: [[TMP107:%.*]] = and <8 x i1> [[TMP103]], [[TMP104]] +; CHECK-NEXT: [[TMP108:%.*]] = and <8 x i1> [[TMP102]], [[TMP105]] +; CHECK-NEXT: [[TMP109:%.*]] = or <8 x i1> [[TMP106]], [[TMP107]] +; CHECK-NEXT: [[TMP110:%.*]] = or <8 x i1> [[TMP109]], [[TMP108]] +; CHECK-NEXT: [[TMP111:%.*]] = and <8 x i1> [[TMP103]], [[TMP105]] +; CHECK-NEXT: [[TMP112:%.*]] = bitcast <8 x i1> [[TMP110]] to i8 +; CHECK-NEXT: [[TMP113:%.*]] = bitcast <8 x i1> [[TMP111]] to i8 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i8> [[_MSPROP5]], i8 [[TMP112]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i8> [[VEC4]], i8 [[TMP113]], i32 5 +; CHECK-NEXT: [[TMP114:%.*]] = xor <8 x i64> [[A0]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP115:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP116:%.*]] = and <8 x i64> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = or <8 x i64> [[TMP114]], [[TMP1]] +; CHECK-NEXT: [[TMP118:%.*]] = xor <8 x i64> [[A1]], splat (i64 -9223372036854775808) +; CHECK-NEXT: [[TMP119:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP120:%.*]] = and <8 x i64> [[TMP118]], [[TMP119]] +; CHECK-NEXT: [[TMP121:%.*]] = or <8 x i64> [[TMP118]], [[TMP2]] +; CHECK-NEXT: [[TMP122:%.*]] = icmp ugt <8 x i64> [[TMP116]], [[TMP121]] +; CHECK-NEXT: [[TMP123:%.*]] = icmp ugt <8 x i64> [[TMP117]], [[TMP120]] +; CHECK-NEXT: [[TMP124:%.*]] = xor <8 x i1> [[TMP122]], [[TMP123]] +; CHECK-NEXT: [[TMP125:%.*]] = icmp sgt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP127:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP128:%.*]] = and <8 x i1> [[TMP124]], [[TMP126]] +; CHECK-NEXT: [[TMP129:%.*]] = and <8 x i1> [[TMP125]], [[TMP126]] +; CHECK-NEXT: [[TMP130:%.*]] = and <8 x i1> [[TMP124]], [[TMP127]] +; CHECK-NEXT: [[TMP131:%.*]] = or <8 x i1> [[TMP128]], [[TMP129]] +; CHECK-NEXT: [[TMP132:%.*]] = or <8 x i1> [[TMP131]], [[TMP130]] +; CHECK-NEXT: [[TMP133:%.*]] = and <8 x i1> [[TMP125]], [[TMP127]] +; CHECK-NEXT: [[TMP134:%.*]] = bitcast <8 x i1> [[TMP132]] to i8 +; CHECK-NEXT: [[TMP135:%.*]] = bitcast <8 x i1> [[TMP133]] to i8 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i8> [[_MSPROP6]], i8 [[TMP134]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i8> [[VEC5]], i8 [[TMP135]], i32 6 +; CHECK-NEXT: [[TMP136:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP137:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP138:%.*]] = and <8 x i1> zeroinitializer, [[TMP136]] +; CHECK-NEXT: [[TMP139:%.*]] = and <8 x i1> splat (i1 true), [[TMP136]] +; CHECK-NEXT: [[TMP140:%.*]] = and <8 x i1> zeroinitializer, [[TMP137]] +; CHECK-NEXT: [[TMP141:%.*]] = or <8 x i1> [[TMP138]], [[TMP139]] +; CHECK-NEXT: [[TMP142:%.*]] = or <8 x i1> [[TMP141]], [[TMP140]] +; CHECK-NEXT: [[TMP143:%.*]] = and <8 x i1> splat (i1 true), [[TMP137]] +; CHECK-NEXT: [[TMP144:%.*]] = bitcast <8 x i1> [[TMP142]] to i8 +; CHECK-NEXT: [[TMP145:%.*]] = bitcast <8 x i1> [[TMP143]] to i8 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i8> [[_MSPROP7]], i8 [[TMP144]], i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i8> [[VEC6]], i8 [[TMP145]], i32 7 +; CHECK-NEXT: store <8 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[VEC7]] +; + %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) + %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 + %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) + %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 + %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) + %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 + %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) + %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 + %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) + %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 + %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) + %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 + %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) + %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 + %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) + %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 + ret <8 x i8> %vec7 +} + +declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone + +define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_ucmp_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 -1) +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP]] to i8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP9]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> splat (i8 -1), i8 [[TMP10]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i8> undef, i8 [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[A0]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[A1]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ult <8 x i64> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp ult <8 x i64> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i1> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i1> [[TMP20]] to i8 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i1> [[TMP21]] to i8 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i8> [[_MSPROP]], i8 [[TMP22]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i8> [[VEC0]], i8 [[TMP23]], i32 1 +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[A0]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP28:%.*]] = and <8 x i64> [[A1]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ule <8 x i64> [[TMP25]], [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ule <8 x i64> [[TMP26]], [[TMP28]] +; CHECK-NEXT: [[TMP32:%.*]] = xor <8 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ule <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <8 x i1> [[TMP32]] to i8 +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <8 x i1> [[TMP33]] to i8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i8> [[_MSPROP1]], i8 [[TMP34]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i8> [[VEC1]], i8 [[TMP35]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i8> [[_MSPROP2]], i8 0, i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i8> [[VEC2]], i8 0, i32 3 +; CHECK-NEXT: [[TMP36:%.*]] = xor <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP37:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne <8 x i64> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[TMP39:%.*]] = xor <8 x i64> [[TMP37]], splat (i64 -1) +; CHECK-NEXT: [[TMP40:%.*]] = and <8 x i64> [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = icmp eq <8 x i64> [[TMP40]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <8 x i1> [[TMP38]], [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = icmp ne <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP4]] to i8 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast <8 x i1> [[TMP42]] to i8 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i8> [[_MSPROP3]], i8 [[TMP43]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i8> [[VEC3]], i8 [[TMP44]], i32 4 +; CHECK-NEXT: [[TMP45:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP46:%.*]] = and <8 x i64> [[A0]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP48:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP49:%.*]] = and <8 x i64> [[A1]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP51:%.*]] = icmp uge <8 x i64> [[TMP46]], [[TMP50]] +; CHECK-NEXT: [[TMP52:%.*]] = icmp uge <8 x i64> [[TMP47]], [[TMP49]] +; CHECK-NEXT: [[TMP53:%.*]] = xor <8 x i1> [[TMP51]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = icmp uge <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <8 x i1> [[TMP53]] to i8 +; CHECK-NEXT: [[TMP56:%.*]] = bitcast <8 x i1> [[TMP54]] to i8 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i8> [[_MSPROP5]], i8 [[TMP55]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i8> [[VEC4]], i8 [[TMP56]], i32 5 +; CHECK-NEXT: [[TMP57:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP58:%.*]] = and <8 x i64> [[A0]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP60:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP61:%.*]] = and <8 x i64> [[A1]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP63:%.*]] = icmp ugt <8 x i64> [[TMP58]], [[TMP62]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp ugt <8 x i64> [[TMP59]], [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = xor <8 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = icmp ugt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP67:%.*]] = bitcast <8 x i1> [[TMP65]] to i8 +; CHECK-NEXT: [[TMP68:%.*]] = bitcast <8 x i1> [[TMP66]] to i8 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i8> [[_MSPROP6]], i8 [[TMP67]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i8> [[VEC5]], i8 [[TMP68]], i32 6 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i8> [[_MSPROP7]], i8 0, i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i8> [[VEC6]], i8 -1, i32 7 +; CHECK-NEXT: store <8 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[VEC7]] +; + %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1) + %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 + %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1) + %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 + %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1) + %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 + %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1) + %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 + %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1) + %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 + %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1) + %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 + %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1) + %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 + %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1) + %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 + ret <8 x i8> %vec7 +} + +define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_ucmp_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = xor <8 x i64> [[A0:%.*]], [[A1:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP5]], splat (i64 -1) +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i1> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i1> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i1> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i1> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i1> [[TMP17]] to i8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <8 x i1> [[TMP18]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> splat (i8 -1), i8 [[TMP19]], i32 0 +; CHECK-NEXT: [[VEC0:%.*]] = insertelement <8 x i8> undef, i8 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[A0]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[A1]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp ult <8 x i64> [[TMP22]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp ult <8 x i64> [[TMP23]], [[TMP25]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i1> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ult <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = and <8 x i1> [[TMP29]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP29]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = or <8 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP37:%.*]] = or <8 x i1> [[TMP36]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = and <8 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP39:%.*]] = bitcast <8 x i1> [[TMP37]] to i8 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i1> [[TMP38]] to i8 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i8> [[_MSPROP]], i8 [[TMP39]], i32 1 +; CHECK-NEXT: [[VEC1:%.*]] = insertelement <8 x i8> [[VEC0]], i8 [[TMP40]], i32 1 +; CHECK-NEXT: [[TMP41:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP42:%.*]] = and <8 x i64> [[A0]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP44:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP45:%.*]] = and <8 x i64> [[A1]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp ule <8 x i64> [[TMP42]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = icmp ule <8 x i64> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP49:%.*]] = xor <8 x i1> [[TMP47]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp ule <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP51:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP53:%.*]] = and <8 x i1> [[TMP49]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = and <8 x i1> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP55:%.*]] = and <8 x i1> [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP56:%.*]] = or <8 x i1> [[TMP53]], [[TMP54]] +; CHECK-NEXT: [[TMP57:%.*]] = or <8 x i1> [[TMP56]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = and <8 x i1> [[TMP50]], [[TMP52]] +; CHECK-NEXT: [[TMP59:%.*]] = bitcast <8 x i1> [[TMP57]] to i8 +; CHECK-NEXT: [[TMP60:%.*]] = bitcast <8 x i1> [[TMP58]] to i8 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i8> [[_MSPROP1]], i8 [[TMP59]], i32 2 +; CHECK-NEXT: [[VEC2:%.*]] = insertelement <8 x i8> [[VEC1]], i8 [[TMP60]], i32 2 +; CHECK-NEXT: [[TMP61:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP62:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP63:%.*]] = and <8 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = and <8 x i1> zeroinitializer, [[TMP61]] +; CHECK-NEXT: [[TMP65:%.*]] = and <8 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP66:%.*]] = or <8 x i1> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = or <8 x i1> [[TMP66]], [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = and <8 x i1> zeroinitializer, [[TMP62]] +; CHECK-NEXT: [[TMP69:%.*]] = bitcast <8 x i1> [[TMP67]] to i8 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast <8 x i1> [[TMP68]] to i8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i8> [[_MSPROP2]], i8 [[TMP69]], i32 3 +; CHECK-NEXT: [[VEC3:%.*]] = insertelement <8 x i8> [[VEC2]], i8 [[TMP70]], i32 3 +; CHECK-NEXT: [[TMP71:%.*]] = xor <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP72:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <8 x i64> [[TMP72]], zeroinitializer +; CHECK-NEXT: [[TMP74:%.*]] = xor <8 x i64> [[TMP72]], splat (i64 -1) +; CHECK-NEXT: [[TMP75:%.*]] = and <8 x i64> [[TMP74]], [[TMP71]] +; CHECK-NEXT: [[TMP76:%.*]] = icmp eq <8 x i64> [[TMP75]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP4:%.*]] = and <8 x i1> [[TMP73]], [[TMP76]] +; CHECK-NEXT: [[TMP77:%.*]] = icmp ne <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP78:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP79:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP80:%.*]] = and <8 x i1> [[_MSPROP_ICMP4]], [[TMP78]] +; CHECK-NEXT: [[TMP81:%.*]] = and <8 x i1> [[TMP77]], [[TMP78]] +; CHECK-NEXT: [[TMP82:%.*]] = and <8 x i1> [[_MSPROP_ICMP4]], [[TMP79]] +; CHECK-NEXT: [[TMP83:%.*]] = or <8 x i1> [[TMP80]], [[TMP81]] +; CHECK-NEXT: [[TMP84:%.*]] = or <8 x i1> [[TMP83]], [[TMP82]] +; CHECK-NEXT: [[TMP85:%.*]] = and <8 x i1> [[TMP77]], [[TMP79]] +; CHECK-NEXT: [[TMP86:%.*]] = bitcast <8 x i1> [[TMP84]] to i8 +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <8 x i1> [[TMP85]] to i8 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i8> [[_MSPROP3]], i8 [[TMP86]], i32 4 +; CHECK-NEXT: [[VEC4:%.*]] = insertelement <8 x i8> [[VEC3]], i8 [[TMP87]], i32 4 +; CHECK-NEXT: [[TMP88:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP89:%.*]] = and <8 x i64> [[A0]], [[TMP88]] +; CHECK-NEXT: [[TMP90:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP91:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP92:%.*]] = and <8 x i64> [[A1]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP94:%.*]] = icmp uge <8 x i64> [[TMP89]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = icmp uge <8 x i64> [[TMP90]], [[TMP92]] +; CHECK-NEXT: [[TMP96:%.*]] = xor <8 x i1> [[TMP94]], [[TMP95]] +; CHECK-NEXT: [[TMP97:%.*]] = icmp uge <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP98:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP99:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP100:%.*]] = and <8 x i1> [[TMP96]], [[TMP98]] +; CHECK-NEXT: [[TMP101:%.*]] = and <8 x i1> [[TMP97]], [[TMP98]] +; CHECK-NEXT: [[TMP102:%.*]] = and <8 x i1> [[TMP96]], [[TMP99]] +; CHECK-NEXT: [[TMP103:%.*]] = or <8 x i1> [[TMP100]], [[TMP101]] +; CHECK-NEXT: [[TMP104:%.*]] = or <8 x i1> [[TMP103]], [[TMP102]] +; CHECK-NEXT: [[TMP105:%.*]] = and <8 x i1> [[TMP97]], [[TMP99]] +; CHECK-NEXT: [[TMP106:%.*]] = bitcast <8 x i1> [[TMP104]] to i8 +; CHECK-NEXT: [[TMP107:%.*]] = bitcast <8 x i1> [[TMP105]] to i8 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i8> [[_MSPROP5]], i8 [[TMP106]], i32 5 +; CHECK-NEXT: [[VEC5:%.*]] = insertelement <8 x i8> [[VEC4]], i8 [[TMP107]], i32 5 +; CHECK-NEXT: [[TMP108:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 -1) +; CHECK-NEXT: [[TMP109:%.*]] = and <8 x i64> [[A0]], [[TMP108]] +; CHECK-NEXT: [[TMP110:%.*]] = or <8 x i64> [[A0]], [[TMP1]] +; CHECK-NEXT: [[TMP111:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 -1) +; CHECK-NEXT: [[TMP112:%.*]] = and <8 x i64> [[A1]], [[TMP111]] +; CHECK-NEXT: [[TMP113:%.*]] = or <8 x i64> [[A1]], [[TMP2]] +; CHECK-NEXT: [[TMP114:%.*]] = icmp ugt <8 x i64> [[TMP109]], [[TMP113]] +; CHECK-NEXT: [[TMP115:%.*]] = icmp ugt <8 x i64> [[TMP110]], [[TMP112]] +; CHECK-NEXT: [[TMP116:%.*]] = xor <8 x i1> [[TMP114]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = icmp ugt <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP118:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP119:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP120:%.*]] = and <8 x i1> [[TMP116]], [[TMP118]] +; CHECK-NEXT: [[TMP121:%.*]] = and <8 x i1> [[TMP117]], [[TMP118]] +; CHECK-NEXT: [[TMP122:%.*]] = and <8 x i1> [[TMP116]], [[TMP119]] +; CHECK-NEXT: [[TMP123:%.*]] = or <8 x i1> [[TMP120]], [[TMP121]] +; CHECK-NEXT: [[TMP124:%.*]] = or <8 x i1> [[TMP123]], [[TMP122]] +; CHECK-NEXT: [[TMP125:%.*]] = and <8 x i1> [[TMP117]], [[TMP119]] +; CHECK-NEXT: [[TMP126:%.*]] = bitcast <8 x i1> [[TMP124]] to i8 +; CHECK-NEXT: [[TMP127:%.*]] = bitcast <8 x i1> [[TMP125]] to i8 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i8> [[_MSPROP6]], i8 [[TMP126]], i32 6 +; CHECK-NEXT: [[VEC6:%.*]] = insertelement <8 x i8> [[VEC5]], i8 [[TMP127]], i32 6 +; CHECK-NEXT: [[TMP128:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP129:%.*]] = bitcast i8 [[MASK]] to <8 x i1> +; CHECK-NEXT: [[TMP130:%.*]] = and <8 x i1> zeroinitializer, [[TMP128]] +; CHECK-NEXT: [[TMP131:%.*]] = and <8 x i1> splat (i1 true), [[TMP128]] +; CHECK-NEXT: [[TMP132:%.*]] = and <8 x i1> zeroinitializer, [[TMP129]] +; CHECK-NEXT: [[TMP133:%.*]] = or <8 x i1> [[TMP130]], [[TMP131]] +; CHECK-NEXT: [[TMP134:%.*]] = or <8 x i1> [[TMP133]], [[TMP132]] +; CHECK-NEXT: [[TMP135:%.*]] = and <8 x i1> splat (i1 true), [[TMP129]] +; CHECK-NEXT: [[TMP136:%.*]] = bitcast <8 x i1> [[TMP134]] to i8 +; CHECK-NEXT: [[TMP137:%.*]] = bitcast <8 x i1> [[TMP135]] to i8 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <8 x i8> [[_MSPROP7]], i8 [[TMP136]], i32 7 +; CHECK-NEXT: [[VEC7:%.*]] = insertelement <8 x i8> [[VEC6]], i8 [[TMP137]], i32 7 +; CHECK-NEXT: store <8 x i8> [[_MSPROP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i8> [[VEC7]] +; + %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask) + %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0 + %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask) + %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1 + %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask) + %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2 + %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask) + %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3 + %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask) + %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4 + %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask) + %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5 + %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask) + %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6 + %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask) + %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7 + ret <8 x i8> %vec7 +} + +declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone + +declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcastf32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[X0:%.*]], <4 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP7]], <16 x float> [[TMP5]], <16 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[_MSPROP2]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x float> [[TMP15]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT3:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP22]], <16 x i32> [[TMP18]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP17]], <16 x float> [[TMP15]], <16 x float> zeroinitializer +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSPROP_SELECT]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[TMP4]], [[TMP14]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or <16 x i32> [[_MSPROP_SELECT3]], [[_MSPROP4]] +; CHECK-NEXT: [[RES5:%.*]] = fadd <16 x float> [[TMP23]], [[RES4]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES5]] +; + %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1) + %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask) + %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask) + %res4 = fadd <16 x float> %res1, %res2 + %res5 = fadd <16 x float> %res3, %res4 + ret <16 x float> %res5 +} + +define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(ptr %x0ptr, <16 x float> %x2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcastf32x4_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X0:%.*]] = load <4 x float>, ptr [[X0PTR:%.*]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X0PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD]], <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %x0 = load <4 x float>, ptr %x0ptr + %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask) + ret <16 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_broadcastf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[X0:%.*]], <4 x double> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcastf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[X0:%.*]], <4 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[_MSPROP]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_broadcastf64x4_512(<4 x double> %x0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_broadcastf64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[X0:%.*]], <4 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(ptr %x0ptr, <8 x double> %x2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcastf64x4_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X0:%.*]] = load <4 x double>, ptr [[X0PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X0PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[_MSLD]], <4 x i64> [[_MSLD]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[_MSPROP]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP18]] +; + %x0 = load <4 x double>, ptr %x0ptr + %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask) + ret <8 x double> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcasti32x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[X0:%.*]], <4 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[MASK]] to <16 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP15]], <16 x i32> [[_MSPROP2]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT3:%.*]] = select <16 x i1> [[TMP14]], <16 x i32> [[TMP19]], <16 x i32> [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP15]], <16 x i32> [[TMP13]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP4]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP21]], <16 x i32> [[_MSPROP_SELECT]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP12]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP22]], <16 x i32> [[_MSPROP_SELECT3]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP20]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP23]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(ptr %x0ptr, <16 x i32> %x2, i16 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcasti32x4_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X0:%.*]] = load <4 x i32>, ptr [[X0PTR:%.*]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X0PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> [[_MSLD]], <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X0]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %x0 = load <4 x i32>, ptr %x0ptr + %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_broadcasti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[X0:%.*]], <4 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcasti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[X0:%.*]], <4 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], [[X2:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_broadcasti64x4_512(<4 x i64> %x0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_broadcasti64x4_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[X0:%.*]], <4 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP3]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP10]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(ptr %x0ptr, <8 x i64> %x2, i8 %mask) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_broadcasti64x4_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X0:%.*]] = load <4 x i64>, ptr [[X0PTR:%.*]], align 32 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X0PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 32 +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[_MSLD]], <4 x i64> [[_MSLD]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[X0]], <4 x i64> [[X0]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[_MSPROP]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %x0 = load <4 x i64>, ptr %x0ptr + %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pabs_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0:%.*]], i1 false) +; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pabs_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0:%.*]], i1 false) +; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.abs.v8i64(<8 x i64> [[X0:%.*]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) + ret <8 x i64> %res +} + +define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) #0 { +; +; CHECK-LABEL: @test_vptestmq( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[A0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP1]], [[A1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], splat (i64 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP]] to i8 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i1> [[TMP16]] to i8 +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[A0]], [[TMP2]] +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP1]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i64> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = xor <8 x i64> [[TMP26]], splat (i64 -1) +; CHECK-NEXT: [[TMP29:%.*]] = and <8 x i64> [[TMP28]], [[TMP25]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <8 x i64> [[TMP29]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <8 x i1> [[TMP27]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[M:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[_MSPROP_ICMP1]], [[TMP32]] +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = and <8 x i1> [[_MSPROP_ICMP1]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = or <8 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = or <8 x i1> [[TMP37]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = and <8 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i1> [[TMP38]] to i8 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i1> [[TMP39]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i8 [[TMP40]], [[TMP17]] +; CHECK-NEXT: [[RES2:%.*]] = add i8 [[TMP41]], [[TMP18]] +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES2]] +; + %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1) + %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m) + %res2 = add i8 %res1, %res + ret i8 %res2 +} +declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) + +define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) #0 { +; +; CHECK-LABEL: @test_vptestmd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[A0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i32> [[TMP1]], [[A1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], splat (i32 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i32> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <16 x i1> [[TMP16]] to i16 +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i32> [[A0]], [[TMP2]] +; CHECK-NEXT: [[TMP21:%.*]] = and <16 x i32> [[TMP1]], [[A1]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i32> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i32> [[A0]], [[A1]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <16 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne <16 x i32> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = xor <16 x i32> [[TMP26]], splat (i32 -1) +; CHECK-NEXT: [[TMP29:%.*]] = and <16 x i32> [[TMP28]], [[TMP25]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq <16 x i32> [[TMP29]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <16 x i1> [[TMP27]], [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i16 [[M:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = and <16 x i1> [[_MSPROP_ICMP1]], [[TMP32]] +; CHECK-NEXT: [[TMP35:%.*]] = and <16 x i1> [[TMP31]], [[TMP32]] +; CHECK-NEXT: [[TMP36:%.*]] = and <16 x i1> [[_MSPROP_ICMP1]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = or <16 x i1> [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = or <16 x i1> [[TMP37]], [[TMP36]] +; CHECK-NEXT: [[TMP39:%.*]] = and <16 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i1> [[TMP38]] to i16 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i1> [[TMP39]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i16 [[TMP40]], [[TMP17]] +; CHECK-NEXT: [[RES2:%.*]] = add i16 [[TMP41]], [[TMP18]] +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[RES2]] +; + %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1) + %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m) + %res2 = add i16 %res1, %res + ret i16 %res2 +} +declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16) + +declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2) + +define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_ptestnm_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i32> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <16 x i32> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], splat (i32 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <16 x i32> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <16 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i1> [[TMP23]] to i16 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i1> [[TMP24]] to i16 +; CHECK-NEXT: [[TMP27:%.*]] = and <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <16 x i32> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <16 x i32> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <16 x i32> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <16 x i32> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <16 x i32> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <16 x i32> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <16 x i32> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <16 x i32> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <16 x i32> [[TMP34]], splat (i32 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i32> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <16 x i32> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <16 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq <16 x i32> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP1]] to i16 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <16 x i1> [[TMP39]] to i16 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i16 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i16 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[RES2]] +; + %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) + %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1) + %res2 = add i16 %res, %res1 + ret i16 %res2 +} + +declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2) + +define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_ptestnm_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = and <8 x i64> [[X0:%.*]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], splat (i64 -1) +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <8 x i1> [[TMP12]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i1> [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i1> [[_MSPROP_ICMP]], [[TMP18]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i1> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x i1> [[TMP23]] to i8 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i1> [[TMP24]] to i8 +; CHECK-NEXT: [[TMP27:%.*]] = and <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP28:%.*]] = and <8 x i64> [[X0]], [[TMP2]] +; CHECK-NEXT: [[TMP29:%.*]] = and <8 x i64> [[TMP1]], [[X1]] +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = and <8 x i64> [[X0]], [[X1]] +; CHECK-NEXT: [[TMP33:%.*]] = xor <8 x i64> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <8 x i64> [[TMP31]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne <8 x i64> [[TMP34]], zeroinitializer +; CHECK-NEXT: [[TMP36:%.*]] = xor <8 x i64> [[TMP34]], splat (i64 -1) +; CHECK-NEXT: [[TMP37:%.*]] = and <8 x i64> [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = icmp eq <8 x i64> [[TMP37]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <8 x i1> [[TMP35]], [[TMP38]] +; CHECK-NEXT: [[TMP39:%.*]] = icmp eq <8 x i64> [[TMP32]], zeroinitializer +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <8 x i1> [[_MSPROP_ICMP1]] to i8 +; CHECK-NEXT: [[TMP41:%.*]] = bitcast <8 x i1> [[TMP39]] to i8 +; CHECK-NEXT: [[_MSPROP:%.*]] = or i8 [[TMP25]], [[TMP40]] +; CHECK-NEXT: [[RES2:%.*]] = add i8 [[TMP26]], [[TMP41]] +; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES2]] +; + %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) + %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1) + %res2 = add i8 %res, %res1 + ret i8 %res2 +} + +declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone +define i16 @test_kand(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @test_kand( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i1> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i1> [[TMP3]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i1> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = and <16 x i1> [[TMP4]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i1> [[TMP9]] to i16 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i1> [[TMP10]] to i16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[TMP11]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[TMP12]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i1> [[TMP13]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i1> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i1> [[TMP13]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i1> [[TMP17]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i1> [[TMP20]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x i1> [[TMP21]] to i16 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x i1> [[TMP22]] to i16 +; CHECK-NEXT: store i16 [[TMP23]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP24]] +; + %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone +define i16 @test_kandn(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @test_kandn( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i1> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i1> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i1> [[_MSPROP]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP5]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i1> [[TMP10]] to i16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i1> [[TMP11]] to i16 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[TMP12]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[TMP13]] to <16 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i1> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = xor <16 x i1> [[TMP15]], splat (i1 true) +; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i1> [[_MSPROP1]], [[TMP16]] +; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i1> [[TMP18]], [[TMP16]] +; CHECK-NEXT: [[TMP21:%.*]] = and <16 x i1> [[_MSPROP1]], [[TMP17]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i1> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i1> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i1> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x i1> [[TMP23]] to i16 +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i1> [[TMP24]] to i16 +; CHECK-NEXT: store i16 [[TMP25]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP26]] +; + %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone +define i16 @test_knot(i16 %a0) #0 { +; +; CHECK-LABEL: @test_knot( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i1> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i1> [[_MSPROP]] to i16 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: store i16 [[TMP5]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP6]] +; + %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone +define i16 @test_kor(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @test_kor( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = and <16 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i1> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i1> [[TMP3]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 0), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 1), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 2), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 3), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 4), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 5), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 6), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 7), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 8), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 9), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 10), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 11), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 12), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 13), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 14), i1 true), i1 xor (i1 extractelement (<16 x i1> bitcast (<1 x i16> splat (i16 8) to <16 x i1>), i32 15), i1 true)> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i1> [[TMP4]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i1> [[TMP10]] to i16 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i1> [[TMP11]] to i16 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16 [[TMP12]] to <16 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i16 [[TMP13]] to <16 x i1> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = xor <16 x i1> [[TMP15]], splat (i1 true) +; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i1> [[TMP17]], splat (i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = and <16 x i1> [[TMP14]], [[TMP16]] +; CHECK-NEXT: [[TMP21:%.*]] = and <16 x i1> [[TMP18]], [[TMP16]] +; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i1> [[TMP14]], [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = or <16 x i1> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i1> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i1> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i1> [[TMP24]] to i16 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i1> [[TMP25]] to i16 +; CHECK-NEXT: store i16 [[TMP26]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP27]] +; + %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone +; TODO: the two kxnor instructions here a no op and should be elimintaed, +; probably by FoldConstantArithmetic in SelectionDAG. +define i16 @test_kxnor(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @test_kxnor( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i1> [[_MSPROP]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = xor <16 x i1> [[TMP5]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[_MSPROP1]] to i16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP6]] to i16 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP7]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP8]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i1> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i1> [[TMP10]], splat (i1 true) +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i1> [[_MSPROP2]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i1> [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i1> [[_MSPROP3]] to i16 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i1> [[TMP14]] to i16 +; CHECK-NEXT: store i16 [[TMP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP16]] +; + %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone +define i16 @test_kxor(i16 %a0, i16 %a1) #0 { +; +; CHECK-LABEL: @test_kxor( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[A0:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <16 x i1> [[TMP4]], bitcast (<1 x i16> splat (i16 8) to <16 x i1>) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[_MSPROP]] to i16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[TMP5]] to i16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP6]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP7]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[A1:%.*]] to <16 x i1> +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i1> [[TMP8]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i1> [[TMP9]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i1> [[_MSPROP1]] to i16 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i1> [[TMP12]] to i16 +; CHECK-NEXT: store i16 [[TMP13]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP14]] +; + %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8) + %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1) + ret i16 %t2 +} + +declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone +define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) #0 { +; CHECK-LABEL: @test_kortestz( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], splat (i32 -1) +; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i32> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP10]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i32> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = xor <16 x i32> [[TMP20]], splat (i32 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <16 x i32> [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <16 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <16 x i1> [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i32> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i1> [[TMP14]] to i16 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP1]] to i16 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i1> [[TMP25]] to i16 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i16 [[TMP26]] to <16 x i1> +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i16 [[TMP27]] to <16 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i16 [[TMP28]] to <16 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i16 [[TMP29]] to <16 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[TMP31]], splat (i1 true) +; CHECK-NEXT: [[TMP35:%.*]] = xor <16 x i1> [[TMP33]], splat (i1 true) +; CHECK-NEXT: [[TMP36:%.*]] = and <16 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i1> [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP30]], [[TMP35]] +; CHECK-NEXT: [[TMP39:%.*]] = or <16 x i1> [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = or <16 x i1> [[TMP39]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <16 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i1> [[TMP40]] to i16 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i1> [[TMP41]] to i16 +; CHECK-NEXT: [[TMP44:%.*]] = xor i16 [[TMP43]], 0 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP42]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i16 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = xor i16 [[TMP45]], -1 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP48]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP2:%.*]] = and i1 [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP43]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext i1 [[_MSPROP_ICMP2]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = zext i1 [[TMP50]] to i32 +; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP51]] +; +entry: + %0 = bitcast <8 x i64> %A to <16 x i32> + %1 = bitcast <8 x i64> %B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %C to <16 x i32> + %4 = bitcast <8 x i64> %D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = bitcast <16 x i1> %2 to i16 + %7 = bitcast <16 x i1> %5 to i16 + %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone +define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) #0 { +; CHECK-LABEL: @test_kortestc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], splat (i32 -1) +; CHECK-NEXT: [[TMP12:%.*]] = and <16 x i32> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP:%.*]] = and <16 x i1> [[TMP10]], [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <16 x i32> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i32> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i32> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = xor <16 x i32> [[TMP20]], splat (i32 -1) +; CHECK-NEXT: [[TMP23:%.*]] = and <16 x i32> [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq <16 x i32> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_ICMP1:%.*]] = and <16 x i1> [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i32> [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP]] to i16 +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <16 x i1> [[TMP14]] to i16 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <16 x i1> [[_MSPROP_ICMP1]] to i16 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <16 x i1> [[TMP25]] to i16 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i16 [[TMP26]] to <16 x i1> +; CHECK-NEXT: [[TMP31:%.*]] = bitcast i16 [[TMP27]] to <16 x i1> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i16 [[TMP28]] to <16 x i1> +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i16 [[TMP29]] to <16 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[TMP31]], splat (i1 true) +; CHECK-NEXT: [[TMP35:%.*]] = xor <16 x i1> [[TMP33]], splat (i1 true) +; CHECK-NEXT: [[TMP36:%.*]] = and <16 x i1> [[TMP30]], [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i1> [[TMP34]], [[TMP32]] +; CHECK-NEXT: [[TMP38:%.*]] = and <16 x i1> [[TMP30]], [[TMP35]] +; CHECK-NEXT: [[TMP39:%.*]] = or <16 x i1> [[TMP36]], [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = or <16 x i1> [[TMP39]], [[TMP38]] +; CHECK-NEXT: [[TMP41:%.*]] = or <16 x i1> [[TMP31]], [[TMP33]] +; CHECK-NEXT: [[TMP42:%.*]] = bitcast <16 x i1> [[TMP40]] to i16 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <16 x i1> [[TMP41]] to i16 +; CHECK-NEXT: [[TMP44:%.*]] = xor i16 [[TMP43]], 0 +; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP42]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = icmp ne i16 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = xor i16 [[TMP45]], -1 +; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP48]], 0 +; CHECK-NEXT: [[_MSPROP_ICMP2:%.*]] = and i1 [[TMP46]], [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP43]], 0 +; CHECK-NEXT: [[_MSPROP:%.*]] = zext i1 [[_MSPROP_ICMP2]] to i32 +; CHECK-NEXT: [[TMP51:%.*]] = zext i1 [[TMP50]] to i32 +; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[TMP51]] +; +entry: + %0 = bitcast <8 x i64> %A to <16 x i32> + %1 = bitcast <8 x i64> %B to <16 x i32> + %2 = icmp ne <16 x i32> %0, %1 + %3 = bitcast <8 x i64> %C to <16 x i32> + %4 = bitcast <8 x i64> %D to <16 x i32> + %5 = icmp ne <16 x i32> %3, %4 + %6 = bitcast <16 x i1> %2 to i16 + %7 = bitcast <16 x i1> %5 to i16 + %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7) + ret i32 %res +} + +define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: @test_cmpps( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 2, <16 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[RES]] to i16 +; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP7]] +; + %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) + ret i16 %res +} +declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) + +define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: @test_cmppd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 4, <8 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[RES]] to i8 +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP7]] +; + %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4) + ret i8 %res +} +declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) + +define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mul_epi32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP10:%.*]] = ashr <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = ashr <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP12:%.*]] = ashr <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = ashr <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP7]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = ashr <8 x i64> [[TMP17]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = mul <8 x i64> [[TMP14]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP21]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[_MSPROP]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP26]], <8 x i64> [[TMP23]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP21]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = shl <8 x i64> [[TMP5]], splat (i64 32) +; CHECK-NEXT: [[TMP11:%.*]] = ashr <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = ashr <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP6]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP7]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[_MSPROP]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP25]], <8 x i64> [[TMP22]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP20]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = shl <8 x i64> [[TMP8]], splat (i64 32) +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = ashr <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = ashr <8 x i64> [[TMP19]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP16]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP24]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP26]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP26]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %b = load <16 x i32>, ptr %ptr_b + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP27:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = xor <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = or <8 x i64> [[TMP28]], [[_MSPROP]] +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP30]], <8 x i64> [[TMP27]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP25]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %b = load <16 x i32>, ptr %ptr_b + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i64> [[TMP9]], splat (i64 32) +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = ashr <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = ashr <8 x i64> [[TMP20]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP25]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = ashr <8 x i64> [[TMP17]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = shl <8 x i64> [[TMP14]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = ashr <8 x i64> [[TMP23]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <8 x i64> [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP27]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP27]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epi32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = shl <8 x i64> [[TMP10]], splat (i64 32) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = shl <8 x i64> [[TMP11]], splat (i64 32) +; CHECK-NEXT: [[TMP17:%.*]] = ashr <8 x i64> [[TMP15]], splat (i64 32) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = ashr <8 x i64> [[TMP16]], splat (i64 32) +; CHECK-NEXT: [[TMP20:%.*]] = shl <8 x i64> [[TMP12]], splat (i64 32) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shl <8 x i64> [[TMP13]], splat (i64 32) +; CHECK-NEXT: [[TMP23:%.*]] = ashr <8 x i64> [[TMP21]], splat (i64 32) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = ashr <8 x i64> [[TMP22]], splat (i64 32) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP26]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) + +define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) #0 { +; CHECK-LABEL: @test_mul_epu32_rr( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP11]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP19]] +; + %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rrk( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP7]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP8]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = mul <8 x i64> [[TMP14]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP24:%.*]] = xor <8 x i64> [[TMP21]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[_MSPROP]] +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i64> [[TMP25]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP26]], <8 x i64> [[TMP23]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP21]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rrkz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[B:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = and <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP5]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP6]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP7]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP12]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = mul <8 x i64> [[TMP13]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP22:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = xor <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[_MSPROP]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP21]], <8 x i64> [[TMP25]], <8 x i64> [[TMP22]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP20]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = and <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP8]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP16]], [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP24]] +; + %b = load <16 x i32>, ptr %ptr_b + %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rmk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP26]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP26]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %b = load <16 x i32>, ptr %ptr_b + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rmkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[B:%.*]] = load <16 x i32>, ptr [[PTR_B:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP27:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP28:%.*]] = xor <8 x i64> [[TMP25]], zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = or <8 x i64> [[TMP28]], [[_MSPROP]] +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP30]], <8 x i64> [[TMP27]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP25]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %b = load <16 x i32>, ptr %ptr_b + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, ptr %ptr_b) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rmb( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP9]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP17]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = mul <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP25]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, ptr %ptr_b, <8 x i64> %passThru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rmbk( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP23:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[TMP26:%.*]] = and <8 x i64> [[TMP14]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <8 x i64> [[TMP20]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP29:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP2]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP30:%.*]] = xor <8 x i64> [[TMP27]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP32:%.*]] = or <8 x i64> [[TMP31]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP28]], <8 x i64> [[TMP32]], <8 x i64> [[TMP29]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP27]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru + ret < 8 x i64> %res +} + +define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, ptr %ptr_b, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mul_epu32_rmbkz( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load i64, ptr [[PTR_B:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i64> undef, i64 [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[B64:%.*]] = shufflevector <8 x i64> [[VECINIT_I]], <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <16 x i32> +; CHECK-NEXT: [[B:%.*]] = bitcast <8 x i64> [[B64]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[A:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[B]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = and <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = and <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i64> [[TMP10]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i64> [[TMP11]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP20:%.*]] = and <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = and <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = and <8 x i64> [[TMP12]], splat (i64 4294967295) +; CHECK-NEXT: [[TMP23:%.*]] = or <8 x i64> [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = and <8 x i64> [[TMP13]], splat (i64 4294967295) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i64> [[TMP18]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <8 x i64> [[TMP19]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP28:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[_MSPROP2]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP29:%.*]] = xor <8 x i64> [[TMP26]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i64> [[TMP29]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP31:%.*]] = or <8 x i64> [[TMP30]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP27]], <8 x i64> [[TMP31]], <8 x i64> [[TMP28]] +; CHECK-NEXT: [[RES:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP26]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %q = load i64, ptr %ptr_b + %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0 + %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer + %b = bitcast <8 x i64> %b64 to <16 x i32> + %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer + ret < 8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) + +define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) +; +; CHECK-LABEL: @test_x86_avx512_mm_cvtu32_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = uitofp i32 [[B:%.*]] to double +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP4]], i64 0 +; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[TMP5]] +; + #0 { + %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone + +define <16 x float> @test_x86_vbroadcast_ss_512(ptr %a0) #0 { +; +; CHECK-LABEL: @test_x86_vbroadcast_ss_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[A0:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A0]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> poison, float [[TMP4]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i32> [[_MSPROP]], i32 [[_MSLD]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x float> [[TMP8]], float [[TMP4]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i32> [[_MSPROP1]], i32 [[_MSLD]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x float> [[TMP9]], float [[TMP4]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i32> [[_MSPROP2]], i32 [[_MSLD]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[TMP4]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i32> [[_MSPROP3]], i32 [[_MSLD]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x float> [[TMP11]], float [[TMP4]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i32> [[_MSPROP4]], i32 [[_MSLD]], i32 5 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x float> [[TMP12]], float [[TMP4]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i32> [[_MSPROP5]], i32 [[_MSLD]], i32 6 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x float> [[TMP13]], float [[TMP4]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i32> [[_MSPROP6]], i32 [[_MSLD]], i32 7 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x float> [[TMP14]], float [[TMP4]], i32 7 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i32> [[_MSPROP7]], i32 [[_MSLD]], i32 8 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x float> [[TMP15]], float [[TMP4]], i32 8 +; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i32> [[_MSPROP8]], i32 [[_MSLD]], i32 9 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP16]], float [[TMP4]], i32 9 +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i32> [[_MSPROP9]], i32 [[_MSLD]], i32 10 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x float> [[TMP17]], float [[TMP4]], i32 10 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i32> [[_MSPROP10]], i32 [[_MSLD]], i32 11 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x float> [[TMP18]], float [[TMP4]], i32 11 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i32> [[_MSPROP11]], i32 [[_MSLD]], i32 12 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP4]], i32 12 +; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i32> [[_MSPROP12]], i32 [[_MSLD]], i32 13 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x float> [[TMP20]], float [[TMP4]], i32 13 +; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i32> [[_MSPROP13]], i32 [[_MSLD]], i32 14 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x float> [[TMP21]], float [[TMP4]], i32 14 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i32> [[_MSPROP14]], i32 [[_MSLD]], i32 15 +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x float> [[TMP22]], float [[TMP4]], i32 15 +; CHECK-NEXT: store <16 x i32> [[_MSPROP15]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP23]] +; + %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(ptr %a0) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(ptr) nounwind readonly + +define <8 x double> @test_x86_vbroadcast_sd_512(ptr %a0) #0 { +; +; CHECK-LABEL: @test_x86_vbroadcast_sd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[A0:%.*]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A0]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x double> poison, double [[TMP4]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <8 x i64> [[_MSPROP]], i64 [[_MSLD]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[TMP4]], i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <8 x i64> [[_MSPROP1]], i64 [[_MSLD]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[TMP4]], i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <8 x i64> [[_MSPROP2]], i64 [[_MSLD]], i32 3 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[TMP4]], i32 3 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <8 x i64> [[_MSPROP3]], i64 [[_MSLD]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[TMP4]], i32 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <8 x i64> [[_MSPROP4]], i64 [[_MSLD]], i32 5 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[TMP4]], i32 5 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <8 x i64> [[_MSPROP5]], i64 [[_MSLD]], i32 6 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[TMP4]], i32 6 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <8 x i64> [[_MSPROP6]], i64 [[_MSLD]], i32 7 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[TMP4]], i32 7 +; CHECK-NEXT: store <8 x i64> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP15]] +; + %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(ptr %a0) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(ptr) nounwind readonly + +declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP7]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP18]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3) + ret <8 x double> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3) + ret <16 x float> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) + +define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X0]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X0]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) + +define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) + +define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[X0]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[X0]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) + +define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X4:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP10]] +; + %x2 = load <16 x i32>, ptr %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, <16 x i32> %x4, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X1]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %x2 = load <16 x i32>, ptr %x2p + %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP20]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP20]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %x2 = load <16 x i32>, ptr %x2p + %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, ptr %x2ptr, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> undef, double [[X2S]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]]) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP23]] +; + %x2s = load double, ptr %x2ptr + %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 + %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer + %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + ret <8 x double> %res +} + +declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + ret <16 x float> %res +} + + +declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) + +define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 9) + ret <16 x float> %res +} + +define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 9) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} + +;; mask float +define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 %mask, i32 9) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> zeroinitializer, i16 %mask, i32 11) + ret <16 x float> %res +} + +;; With Passthru value +define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_passthru_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> %passthru, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_passthru_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> %passthru, i16 %mask, i32 9) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_passthru_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> %passthru, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_vmulps_mask_passthru_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1, + <16 x float> %passthru, i16 %mask, i32 11) + ret <16 x float> %res +} + +;; mask double +define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_vmulpd_mask_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, + <8 x double> zeroinitializer, i8 %mask, i32 8) + ret <8 x double> %res +} + +define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_vmulpd_mask_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, + <8 x double> zeroinitializer, i8 %mask, i32 9) + ret <8 x double> %res +} + +define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_vmulpd_mask_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, + <8 x double> zeroinitializer, i8 %mask, i32 10) + ret <8 x double> %res +} + +define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; +; CHECK-LABEL: @test_vmulpd_mask_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1, + <8 x double> zeroinitializer, i8 %mask, i32 11) + ret <8 x double> %res +} + +define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) + ret <16 x float> %res +} + + +define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} +define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9) + ret <16 x float> %res +} +define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define void @test_mask_compress_store_pd_512(ptr %addr, <8 x double> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_store_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[TMP3]], ptr [[TMP8]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.masked.compressstore.v8f64(<8 x double> [[DATA:%.*]], ptr [[ADDR]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.pd.512(ptr %addr, <8 x double> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.pd.512(ptr %addr, <8 x double> %data, i8 %mask) + +define void @test_compress_store_pd_512(ptr %addr, <8 x double> %data) #0 { +; +; CHECK-LABEL: @test_compress_store_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[TMP2]], ptr [[TMP5]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.masked.compressstore.v8f64(<8 x double> [[DATA:%.*]], ptr [[ADDR]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.pd.512(ptr %addr, <8 x double> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_ps_512(ptr %addr, <16 x float> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_store_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[TMP3]], ptr [[TMP8]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.masked.compressstore.v16f32(<16 x float> [[DATA:%.*]], ptr [[ADDR]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.ps.512(ptr %addr, <16 x float> %data, i16 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.ps.512(ptr %addr, <16 x float> %data, i16 %mask) + +define void @test_compress_store_ps_512(ptr %addr, <16 x float> %data) #0 { +; +; CHECK-LABEL: @test_compress_store_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[TMP2]], ptr [[TMP5]], <16 x i1> splat (i1 true)) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.masked.compressstore.v16f32(<16 x float> [[DATA:%.*]], ptr [[ADDR]], <16 x i1> splat (i1 true)) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.ps.512(ptr %addr, <16 x float> %data, i16 -1) + ret void +} + +define void @test_mask_compress_store_q_512(ptr %addr, <8 x i64> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_store_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[TMP3]], ptr [[TMP8]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[DATA:%.*]], ptr [[ADDR]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.q.512(ptr %addr, <8 x i64> %data, i8 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.q.512(ptr %addr, <8 x i64> %data, i8 %mask) + +define void @test_compress_store_q_512(ptr %addr, <8 x i64> %data) #0 { +; +; CHECK-LABEL: @test_compress_store_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[TMP2]], ptr [[TMP5]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.masked.compressstore.v8i64(<8 x i64> [[DATA:%.*]], ptr [[ADDR]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.q.512(ptr %addr, <8 x i64> %data, i8 -1) + ret void +} + +define void @test_mask_compress_store_d_512(ptr %addr, <16 x i32> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_store_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[TMP3]], ptr [[TMP8]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[DATA:%.*]], ptr [[ADDR]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.d.512(ptr %addr, <16 x i32> %data, i16 %mask) + ret void +} + +declare void @llvm.x86.avx512.mask.compress.store.d.512(ptr %addr, <16 x i32> %data, i16 %mask) + +define void @test_compress_store_d_512(ptr %addr, <16 x i32> %data) #0 { +; +; CHECK-LABEL: @test_compress_store_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[TMP2]], ptr [[TMP5]], <16 x i1> splat (i1 true)) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: call void @llvm.masked.compressstore.v16i32(<16 x i32> [[DATA:%.*]], ptr [[ADDR]], <16 x i1> splat (i1 true)) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.compress.store.d.512(ptr %addr, <16 x i32> %data, i16 -1) + ret void +} + +define <8 x double> @test_mask_expand_load_pd_512(ptr %addr, <8 x double> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_load_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP8]], <8 x i1> [[TMP5]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x double> @llvm.masked.expandload.v8f64(ptr [[ADDR]], <8 x i1> [[TMP5]], <8 x double> [[DATA:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP12]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(ptr %addr, <8 x double> %data, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_maskz_expand_load_pd_512(ptr %addr, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_load_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP7]], <8 x i1> [[TMP4]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.masked.expandload.v8f64(ptr [[ADDR]], <8 x i1> [[TMP4]], <8 x double> zeroinitializer) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(ptr %addr, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(ptr %addr, <8 x double> %data, i8 %mask) + +define <8 x double> @test_expand_load_pd_512(ptr %addr, <8 x double> %data) #0 { +; +; CHECK-LABEL: @test_expand_load_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP5]], <8 x i1> splat (i1 true), <8 x i64> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.masked.expandload.v8f64(ptr [[ADDR]], <8 x i1> splat (i1 true), <8 x double> [[DATA:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP8]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(ptr %addr, <8 x double> %data, i8 -1) + ret <8 x double> %res +} + +; Make sure we don't crash if you pass 0 to the mask. +define <8 x double> @test_zero_mask_expand_load_pd_512(ptr %addr, <8 x double> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_zero_mask_expand_load_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP5]], <8 x i1> zeroinitializer, <8 x i64> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.masked.expandload.v8f64(ptr [[ADDR]], <8 x i1> zeroinitializer, <8 x double> [[DATA:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP8]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(ptr %addr, <8 x double> %data, i8 0) + ret <8 x double> %res +} + +define <16 x float> @test_mask_expand_load_ps_512(ptr %addr, <16 x float> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_load_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP8]], <16 x i1> [[TMP5]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x float> @llvm.masked.expandload.v16f32(ptr [[ADDR]], <16 x i1> [[TMP5]], <16 x float> [[DATA:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP12]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(ptr %addr, <16 x float> %data, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_maskz_expand_load_ps_512(ptr %addr, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_load_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP7]], <16 x i1> [[TMP4]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.masked.expandload.v16f32(ptr [[ADDR]], <16 x i1> [[TMP4]], <16 x float> zeroinitializer) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(ptr %addr, <16 x float> zeroinitializer, i16 %mask) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(ptr %addr, <16 x float> %data, i16 %mask) + +define <16 x float> @test_expand_load_ps_512(ptr %addr, <16 x float> %data) #0 { +; +; CHECK-LABEL: @test_expand_load_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP5]], <16 x i1> splat (i1 true), <16 x i32> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.masked.expandload.v16f32(ptr [[ADDR]], <16 x i1> splat (i1 true), <16 x float> [[DATA:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP8]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(ptr %addr, <16 x float> %data, i16 -1) + ret <16 x float> %res +} + +define <8 x i64> @test_mask_expand_load_q_512(ptr %addr, <8 x i64> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_load_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP8]], <8 x i1> [[TMP5]], <8 x i64> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[ADDR]], <8 x i1> [[TMP5]], <8 x i64> [[DATA:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(ptr %addr, <8 x i64> %data, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_maskz_expand_load_q_512(ptr %addr, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_load_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP7]], <8 x i1> [[TMP4]], <8 x i64> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[ADDR]], <8 x i1> [[TMP4]], <8 x i64> zeroinitializer) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(ptr %addr, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(ptr %addr, <8 x i64> %data, i8 %mask) + +define <8 x i64> @test_expand_load_q_512(ptr %addr, <8 x i64> %data) #0 { +; +; CHECK-LABEL: @test_expand_load_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[TMP5]], <8 x i1> splat (i1 true), <8 x i64> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.masked.expandload.v8i64(ptr [[ADDR]], <8 x i1> splat (i1 true), <8 x i64> [[DATA:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP8]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(ptr %addr, <8 x i64> %data, i8 -1) + ret <8 x i64> %res +} + +define <16 x i32> @test_mask_expand_load_d_512(ptr %addr, <16 x i32> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_load_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP8]], <16 x i1> [[TMP5]], <16 x i32> [[TMP3]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[ADDR]], <16 x i1> [[TMP5]], <16 x i32> [[DATA:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(ptr %addr, <16 x i32> %data, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_maskz_expand_load_d_512(ptr %addr, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_load_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP7]], <16 x i1> [[TMP4]], <16 x i32> zeroinitializer) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[ADDR]], <16 x i1> [[TMP4]], <16 x i32> zeroinitializer) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(ptr %addr, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(ptr %addr, <16 x i32> %data, i16 %mask) + +define <16 x i32> @test_expand_load_d_512(ptr %addr, <16 x i32> %data) #0 { +; +; CHECK-LABEL: @test_expand_load_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ADDR:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[_MSMASKEDEXPLOAD:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[TMP5]], <16 x i1> splat (i1 true), <16 x i32> [[TMP2]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.masked.expandload.v16i32(ptr [[ADDR]], <16 x i1> splat (i1 true), <16 x i32> [[DATA:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSMASKEDEXPLOAD]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP8]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(ptr %addr, <16 x i32> %data, i16 -1) + ret <16 x i32> %res +} + +define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_maskz_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mm512_mask_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} + +define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) + +define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 4) + ret <8 x double> %res +} +define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 4) + ret <8 x double> %res +} +define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4) + ret <8 x double> %res +} +define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP5]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 11) + ret <8 x double> %res +} +define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> zeroinitializer, <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP9]], <8 x double> [[TMP7]], <8 x double> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 11) + ret <8 x double> %res +} +define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 11) + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone + +define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4) + ret <16 x float> %res +} +define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4) + ret <16 x float> %res +} +define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4) + ret <16 x float> %res +} +define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP5]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11) + ret <16 x float> %res +} +define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11) + ret <16 x float> %res +} +define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone + +declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prolv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prolv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prolv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prolv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prolv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prolv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prorv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP7]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prorv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP9]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP16]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + ret <16 x i32> %res +} + +define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prorv_d_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP14]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP8]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP15]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3) + ret <16 x i32> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_prorv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP7]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prorv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP9]], [[X2:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP9]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP16]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + ret <8 x i64> %res +} + +define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_prorv_q_512_old( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> [[X1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP7]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP14]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP8]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP15]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) + ret <8 x i64> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prol_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_prol_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16) + +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pror_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0:%.*]], <16 x i32> [[X0]], <16 x i32> splat (i32 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP6]], <16 x i32> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[X3]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP15]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i32> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <16 x i32> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP22]], <16 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP18]], <16 x i32> [[TMP16]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[TMP1]], <16 x i32> [[TMP1]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> [[X0]], <16 x i32> [[X0]], <16 x i32> splat (i32 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } { <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1), <16 x i32> splat (i32 -1) }, <16 x i32> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP27]], <16 x i32> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES3]], <16 x i32> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP28]], <16 x i32> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } [[RES4]], <16 x i32> [[TMP26]], 2 +; CHECK-NEXT: store { <16 x i32>, <16 x i32>, <16 x i32> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <16 x i32>, <16 x i32>, <16 x i32> } [[RES5]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8) + +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pror_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0:%.*]], <8 x i64> [[X0]], <8 x i64> splat (i64 3)) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP6]], [[X2:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP6]], <8 x i64> [[X2]] +; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 4)) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP15]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], [[TMP15]] +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP22]], <8 x i64> [[TMP19]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP18]], <8 x i64> [[TMP16]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP1]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP25:%.*]] = or <8 x i64> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> [[X0]], <8 x i64> [[X0]], <8 x i64> splat (i64 5)) +; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } { <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1), <8 x i64> splat (i64 -1) }, <8 x i64> [[_MSPROP_SELECT]], 0 +; CHECK-NEXT: [[RES3:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> [[TMP13]], 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP27]], <8 x i64> [[_MSPROP_SELECT1]], 1 +; CHECK-NEXT: [[RES4:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES3]], <8 x i64> [[TMP23]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP28]], <8 x i64> [[TMP25]], 2 +; CHECK-NEXT: [[RES5:%.*]] = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } [[RES4]], <8 x i64> [[TMP26]], 2 +; CHECK-NEXT: store { <8 x i64>, <8 x i64>, <8 x i64> } [[TMP29]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <8 x i64>, <8 x i64>, <8 x i64> } [[RES5]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 +} + +declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP5]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X0]], double [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP11]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP27]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP11]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP27]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X0]], double [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11) + %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res2, %res3 + ret <2 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP5]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP11]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP27]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP11]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP27]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X0]], float [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11) + %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + +declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP14]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP16]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP11]], double [[TMP8]], double 0.000000e+00 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> [[X0]], double [[TMP17]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP15:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR16:%.*]] = or i1 [[_MSOR]], [[_MSCMP15]] +; CHECK-NEXT: br i1 [[_MSOR16]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP19]], double [[TMP20]], double [[TMP21]], i32 11) +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i1> [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i64 0, i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast double [[TMP24]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = xor i64 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP31]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT11:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP32]], i64 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP27]], double [[TMP24]], double 0.000000e+00 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT11]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x double> [[X0]], double [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP12]] +; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[TMP18]], [[TMP34]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP13]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES2]] +; + %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = xor i32 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP14]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP16]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP11]], float [[TMP8]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[X0]], float [[TMP17]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP15:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR16:%.*]] = or i1 [[_MSOR]], [[_MSCMP15]] +; CHECK-NEXT: br i1 [[_MSOR16]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP19]], float [[TMP20]], float [[TMP21]], i32 11) +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i1> [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 0, i32 0 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast float [[TMP24]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = xor i32 [[TMP29]], 0 +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP31]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT11:%.*]] = select i1 [[_MSPROP10]], i32 [[TMP32]], i32 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP27]], float [[TMP24]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT11]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[X0]], float [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP12]] +; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[TMP18]], [[TMP34]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP18]] +; + %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res +} +declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP2]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP7]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP7]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X2]], double [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X2]], double [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP13]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP29]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP13]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP29]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X2]], double [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11) + %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res2, %res3 + ret <2 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP2]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP7]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X2]], float [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP3]], i32 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X2]], float [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP13]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP29]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP13]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP29]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X2]], float [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11) + %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + +define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) #0 { +; +; CHECK-LABEL: @fmadd_ss_mask_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1 +; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2 +; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3 +; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]] +; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 [[_MSPROP9]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP14]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], [[_MSPROP13]] +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP25]], [[_MSPROP9]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP26]], i32 [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], float [[TMP17]], float [[TMP14]] +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> [[AV]], float [[TMP27]], i64 0 +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP28]], i32 0 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080 +; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP33]], align 4 +; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4 +; CHECK-NEXT: ret void +; + %a.val = load float, ptr %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, ptr %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + + %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4) + + %sr = extractelement <4 x float> %vr, i32 0 + store float %sr, ptr %a + ret void +} + +define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 { +; +; CHECK-LABEL: @fmadd_ss_maskz_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1 +; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2 +; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3 +; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]] +; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP13]] +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP25]], i32 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], float [[TMP17]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[AV]], float [[TMP26]], i64 0 +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP27]], i32 0 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP32]], align 4 +; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4 +; CHECK-NEXT: ret void +; + %a.val = load float, ptr %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, ptr %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + + %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4) + + %sr = extractelement <4 x float> %vr, i32 0 + store float %sr, ptr %a + ret void +} + +define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) #0 { +; +; CHECK-LABEL: @fmadd_sd_mask_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1 +; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]] +; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast double [[TMP14]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP25]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP26]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], double [[TMP17]], double [[TMP14]] +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[AV]], double [[TMP27]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP28]], i32 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080 +; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP33]], align 8 +; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8 +; CHECK-NEXT: ret void +; + %a.val = load double, ptr %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, ptr %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + + %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4) + + %sr = extractelement <2 x double> %vr, i32 0 + store double %sr, ptr %a + ret void +} + +define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 { +; +; CHECK-LABEL: @fmadd_sd_maskz_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1 +; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]] +; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP25]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], double [[TMP17]], double 0.000000e+00 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> [[AV]], double [[TMP26]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP27]], i32 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP32]], align 8 +; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8 +; CHECK-NEXT: ret void +; + %a.val = load double, ptr %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, ptr %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + + %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4) + + %sr = extractelement <2 x double> %vr, i32 0 + store double %sr, ptr %a + ret void +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP6]], double [[TMP7]], double [[TMP8]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[_MSPROP4]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast double [[TMP9]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP19]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], double [[TMP9]], double [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> [[X2]], double [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP22]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP23]], double [[TMP24]], double [[TMP25]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x double> [[X2]], double [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP31]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]] +; CHECK: 35: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 36: +; CHECK-NEXT: [[TMP37:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP32]], double [[TMP33]], double [[TMP34]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i64 0, i64 [[_MSPROP16]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast double [[TMP37]] to i64 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast double [[TMP38]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = xor i64 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP46]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP47]], i64 [[TMP42]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], double [[TMP37]], double [[TMP38]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x double> [[X2]], double [[TMP48]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP21]], [[TMP30]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP49]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11) + %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res2, %res3 + ret <2 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.fma.f32(float [[TMP6]], float [[TMP7]], float [[TMP8]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[_MSPROP4]], i32 [[_MSPROP5]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = xor i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP17]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP19]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], float [[TMP9]], float [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[X2]], float [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP22]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP23]], float [[TMP24]], float [[TMP25]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[X2]], float [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x float> [[TMP31]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]] +; CHECK: 35: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 36: +; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP32]], float [[TMP33]], float [[TMP34]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 0, i32 [[_MSPROP16]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast float [[TMP37]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[TMP38]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP46]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP47]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], float [[TMP37]], float [[TMP38]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[X2]], float [[TMP48]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP21]], [[TMP30]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP49]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11) + %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = fneg <2 x double> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP10:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP9]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[_MSPROP4]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast double [[TMP11]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP20]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], double [[TMP10]], double [[TMP11]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[X2]], double [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = fneg <2 x double> [[X0]] +; CHECK-NEXT: [[TMP24:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP23]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[TMP24]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP25]], double [[TMP26]], double [[TMP27]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP2]], i64 0, i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[X2]], double [[TMP30]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = fneg <2 x double> [[X0]] +; CHECK-NEXT: [[TMP34:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP34]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 38: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 39: +; CHECK-NEXT: [[TMP40:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP35]], double [[TMP36]], double [[TMP37]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 [[_MSPROP16]] +; CHECK-NEXT: [[TMP46:%.*]] = bitcast double [[TMP40]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast double [[TMP41]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = xor i64 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = or i64 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = or i64 [[TMP49]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP50]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], double [[TMP40]], double [[TMP41]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x double> [[X2]], double [[TMP51]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP22]], [[TMP32]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP52]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11) + %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res2, %res3 + ret <2 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = fneg <4 x float> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.fma.f32(float [[TMP7]], float [[TMP8]], float [[TMP9]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[_MSPROP4]], i32 [[_MSPROP5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP11]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP20]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], float [[TMP10]], float [[TMP11]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[X2]], float [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = fneg <4 x float> [[X0]] +; CHECK-NEXT: [[TMP24:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP23]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[TMP24]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP25]], float [[TMP26]], float [[TMP27]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[X2]], float [[TMP30]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = fneg <4 x float> [[X0]] +; CHECK-NEXT: [[TMP34:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 38: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 39: +; CHECK-NEXT: [[TMP40:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP35]], float [[TMP36]], float [[TMP37]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 0, i32 [[_MSPROP16]] +; CHECK-NEXT: [[TMP46:%.*]] = bitcast float [[TMP40]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast float [[TMP41]] to i32 +; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP50]], i32 [[TMP45]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], float [[TMP40]], float [[TMP41]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[X2]], float [[TMP51]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP22]], [[TMP32]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP52]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11) + %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res2, %res3 + ret <4 x float> %res4 +} + +define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP3]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP12]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X1]], float [[TMP23]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP24]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) + ret < 4 x float> %res +} + +define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP1]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X0]], float [[TMP23]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP24]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4) + ret < 4 x float> %res +} + + +define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.fma.f32(float [[TMP9]], float [[TMP10]], float [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = select i1 false, i32 [[_MSPROP5]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP17]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 false, float [[TMP12]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP19]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4) + ret < 4 x float> %res +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[_MSPROP]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP10]], <8 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP4]], <8 x i32> [[X1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP11]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[_MSPROP]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP9]], <8 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP10]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32) + +define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[CVT:%.*]] = sitofp <16 x i32> [[X0:%.*]] to <16 x float> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32) + +define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 { +; +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[CVT:%.*]] = uitofp <16 x i32> [[X0:%.*]] to <16 x float> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_compress_pd_512(<8 x double> %data) #0 { +; CHECK-LABEL: @test_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) + +define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_compress_ps_512(<16 x float> %data) #0 { +; CHECK-LABEL: @test_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) + +define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_compress_q_512(<8 x i64> %data) #0 { +; CHECK-LABEL: @test_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) + +define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_compress_d_512(<16 x i32> %data) #0 { +; CHECK-LABEL: @test_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) + +define <8 x double> @test_expand_pd_512(<8 x double> %data) #0 { +; CHECK-LABEL: @test_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1) + ret <8 x double> %res +} + +define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask) + ret <8 x double> %res +} + +define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask) + +define <16 x float> @test_expand_ps_512(<16 x float> %data) #0 { +; CHECK-LABEL: @test_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1) + ret <16 x float> %res +} + +define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask) + ret <16 x float> %res +} + +define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask) + +define <8 x i64> @test_expand_q_512(<8 x i64> %data) #0 { +; CHECK-LABEL: @test_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1) + ret <8 x i64> %res +} + +define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) + ret <8 x i64> %res +} + +define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask) + +define <16 x i32> @test_expand_d_512(<16 x i32> %data) #0 { +; CHECK-LABEL: @test_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1) + ret <16 x i32> %res +} + +define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 { +; +; CHECK-LABEL: @test_mask_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) + ret <16 x i32> %res +} + +define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) #0 { +; +; CHECK-LABEL: @test_maskz_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask) + +define <16 x float> @test_cmp_512(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, ptr %p) #0 { +; +; CHECK-LABEL: @test_cmp_512( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP0]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 1, <16 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[C:%.*]], <16 x float> [[D:%.*]], i32 1, <16 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP5]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = load <16 x float>, ptr [[P:%.*]], align 64 +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = xor i64 [[TMP18]], 87960930222080 +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP20]], align 64 +; CHECK-NEXT: [[TMP21:%.*]] = xor <16 x i1> [[TMP9]], [[TMP14]] +; CHECK-NEXT: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x i32> zeroinitializer, <16 x i32> [[_MSLD]] +; CHECK-NEXT: [[TMP23:%.*]] = bitcast <16 x float> [[TMP17]] to <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = xor <16 x i32> zeroinitializer, [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = or <16 x i32> [[TMP24]], zeroinitializer +; CHECK-NEXT: [[TMP26:%.*]] = or <16 x i32> [[TMP25]], [[_MSLD]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP26]], <16 x i32> [[TMP22]] +; CHECK-NEXT: [[TMP27:%.*]] = select <16 x i1> [[TMP21]], <16 x float> zeroinitializer, <16 x float> [[TMP17]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP27]] +; + entry: + %0 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 1, i32 8) + %1 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %d, i32 1, i32 4) + %2 = load <16 x float>, ptr %p + %3 = xor <16 x i1> %0, %1 + %4 = select <16 x i1> %3, <16 x float> zeroinitializer, <16 x float> %2 + ret <16 x float> %4 +} + +declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32) + +attributes #0 = { sanitize_memory } From 84db875d250bd64a1408347e3b4419d3ca94423b Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Sat, 25 Jan 2025 03:15:37 +0000 Subject: [PATCH 2/4] Add avx512-intrinsics.ll --- .../MemorySanitizer/avx512-intrinsics.ll | 13714 ++++++++++++++++ 1 file changed, 13714 insertions(+) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll new file mode 100644 index 00000000000000..11f72ce39b0b8f --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll @@ -0,0 +1,13714 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s +; +; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll + +define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1) + ret <8 x double> %2 +} + +define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1) + ret <8 x double> %2 +} + +define <8 x double> @test_compress_pd_512(<8 x double> %data) #0 { +; CHECK-LABEL: @test_compress_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> ) + ret <8 x double> %1 +} + +define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1) + ret <16 x float> %2 +} + +define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1) + ret <16 x float> %2 +} + +define <16 x float> @test_compress_ps_512(<16 x float> %data) #0 { +; CHECK-LABEL: @test_compress_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> ) + ret <16 x float> %1 +} + +define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1) + ret <8 x i64> %2 +} + +define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1) + ret <8 x i64> %2 +} + +define <8 x i64> @test_compress_q_512(<8 x i64> %data) #0 { +; CHECK-LABEL: @test_compress_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> ) + ret <8 x i64> %1 +} + +define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1) + ret <16 x i32> %2 +} + +define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1) + ret <16 x i32> %2 +} + +define <16 x i32> @test_compress_d_512(<16 x i32> %data) #0 { +; CHECK-LABEL: @test_compress_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> ) + ret <16 x i32> %1 +} + +define <8 x double> @test_expand_pd_512(<8 x double> %data) #0 { +; CHECK-LABEL: @test_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> ) + ret <8 x double> %1 +} + +define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1) + ret <8 x double> %2 +} + +define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_expand_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1) + ret <8 x double> %2 +} + +define <16 x float> @test_expand_ps_512(<16 x float> %data) #0 { +; CHECK-LABEL: @test_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> ) + ret <16 x float> %1 +} + +define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1) + ret <16 x float> %2 +} + +define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_expand_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1) + ret <16 x float> %2 +} + +define <8 x i64> @test_expand_q_512(<8 x i64> %data) #0 { +; CHECK-LABEL: @test_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true)) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> ) + ret <8 x i64> %1 +} + +define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1) + ret <8 x i64> %2 +} + +define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_expand_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %1 = bitcast i8 %mask to <8 x i1> + %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1) + ret <8 x i64> %2 +} + +define <16 x i32> @test_expand_d_512(<16 x i32> %data) #0 { +; CHECK-LABEL: @test_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true)) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> ) + ret <16 x i32> %1 +} + +define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1) + ret <16 x i32> %2 +} + +define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_expand_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %1 = bitcast i16 %mask to <16 x i1> + %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1) + ret <16 x i32> %2 +} + +define <16 x float> @test_rcp_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_rcp_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone + +define <8 x double> @test_rcp_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_rcp_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone + +declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32) + +define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @test_rndscale_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 11, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4) + ret <2 x double>%res +} + +define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) #0 { +; CHECK-LABEL: @test_rndscale_sd_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) + ret <2 x double>%res +} + +define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, ptr %bptr, <2 x double> %c, i8 %mask) #0 { +; CHECK-LABEL: @test_rndscale_sd_mask_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[B:%.*]] = load <2 x double>, ptr [[BPTR:%.*]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[BPTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 16 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %b = load <2 x double>, ptr %bptr + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4) + ret <2 x double>%res +} + +define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) #0 { +; CHECK-LABEL: @test_rndscale_sd_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4) + ret <2 x double>%res +} + +declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32) + +define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) #0 { +; CHECK-LABEL: @test_rndscale_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 11, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) + ret <4 x float>%res +} + +define <4 x float> @test_rndscale_ss_load(<4 x float> %a, ptr %bptr) #0 { +; CHECK-LABEL: @test_rndscale_ss_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load <4 x float>, ptr [[BPTR:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[BPTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B]], <4 x float> undef, i8 -1, i32 11, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %b = load <4 x float>, ptr %bptr + %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4) + ret <4 x float>%res +} + +define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) #0 { +; CHECK-LABEL: @test_rndscale_ss_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4) + ret <4 x float>%res +} + +define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) #0 { +; CHECK-LABEL: @test_rndscale_ss_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4) + ret <4 x float>%res +} + +declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32) + +define <8 x double> @test7(<8 x double> %a) #0 { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> [[A:%.*]], i32 11, <8 x double> [[A]], i8 -1, i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4) + ret <8 x double>%res +} + +declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32) + +define <16 x float> @test8(<16 x float> %a) #0 { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> [[A:%.*]], i32 11, <16 x float> [[A]], i16 -1, i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4) + ret <16 x float>%res +} + +define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_rsqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone + +define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) + ret <8 x double> %1 +} + +define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP13]] +; + %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_sqrt_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP11]] +; + %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) + +define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP5]] +; + %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) + ret <8 x double> %1 +} + +define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> zeroinitializer, <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP9]], <8 x double> [[TMP7]], <8 x double> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_sqrt_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP14]] +; + %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} +declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone + +define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) + ret <16 x float> %1 +} + +define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP13]] +; + %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_sqrt_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP11]] +; + %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP5]] +; + %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_mask_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) #0 { +; CHECK-LABEL: @test_maskz_sqrt_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP14]] +; + %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} +declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone + +define <8 x double> @test_getexp_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_getexp_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) + ret <8 x double> %res +} +define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) #0 { +; CHECK-LABEL: @test_getexp_round_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 12) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 12) + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone + +define <16 x float> @test_getexp_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_getexp_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) #0 { +; CHECK-LABEL: @test_getexp_round_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone + +declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone + +define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_sqrt_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]] +; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 9) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]] +; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 [[MASK]], i32 10) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]] +; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] +; CHECK: 21: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 22: +; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 -1, i32 11) +; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES_2:%.*]] = fadd <4 x float> [[RES2]], [[RES3]] +; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[RES_1]], [[RES_2]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9) + %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10) + %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11) + + %res.1 = fadd <4 x float> %res0, %res1 + %res.2 = fadd <4 x float> %res2, %res3 + %res = fadd <4 x float> %res.1, %res.2 + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone + +define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_sqrt_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]] +; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 9) +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]] +; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 [[MASK]], i32 10) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]] +; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] +; CHECK: 21: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 22: +; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 -1, i32 11) +; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES_2:%.*]] = fadd <2 x double> [[RES2]], [[RES3]] +; CHECK-NEXT: [[RES:%.*]] = fadd <2 x double> [[RES_1]], [[RES_2]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9) + %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10) + %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11) + + %res.1 = fadd <2 x double> %res0, %res1 + %res.2 = fadd <2 x double> %res2, %res3 + %res = fadd <2 x double> %res.1, %res.2 + ret <2 x double> %res +} + +define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvttsd2usi( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0]], i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ; + %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ; + %res2 = add i32 %res0, %res1 + ret i32 %res2 +} +declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvttsd2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0]], i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ; + %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ; + %res2 = add i32 %res0, %res1 + ret i32 %res2 +} +declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvttss2si( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0:%.*]], i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0]], i32 4) +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ; + %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ; + %res2 = add i32 %res0, %res1 + ret i32 %res2 +} +declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvttss2si_load(ptr %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvttss2si_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] +; CHECK: 2: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 3: +; CHECK-NEXT: [[A1:%.*]] = load <4 x float>, ptr [[A0:%.*]], align 16 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A1]], i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %a1 = load <4 x float>, ptr %a0 + %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ; + ret i32 %res +} + +define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvttss2usi( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0:%.*]], i32 8) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0]], i32 4) +; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES2]] +; + %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; + %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; + %res2 = add i32 %res0, %res1 + ret i32 %res2 +} +declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvtsd2usi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 9) +; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES4]] +; + %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11) + %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvtsd2si32( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 9) +; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES4]] +; + %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11) + %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvtss2usi32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 9) +; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES4]] +; + %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11) + %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_cvtss2si32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0:%.*]], i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 11) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 9) +; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]] +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES4]] +; + %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11) + %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone + +define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, ptr %dst) #0 { +; CHECK-LABEL: @test_x86_vcvtps2ph_256( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]]) +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]] +; CHECK: 15: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32 +; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32 +; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]] +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES]] +; + %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 11, <16 x i16> zeroinitializer, i16 %mask) + %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 12, <16 x i16> %src, i16 %mask) + store <16 x i16> %res1, ptr %dst + %res = add <16 x i16> %res2, %res3 + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly + +define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) #0 { +; CHECK-LABEL: @test_cmpps( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 2, <16 x i1> splat (i1 true), i32 8) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[RES]] to i16 +; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i16 [[TMP7]] +; + %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> , i32 8) + %1 = bitcast <16 x i1> %res to i16 + ret i16 %1 +} +declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) + +define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) #0 { +; CHECK-LABEL: @test_cmppd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 4, <8 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[RES]] to i8 +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[TMP7]] +; + %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, <8 x i1> , i32 4) + %1 = bitcast <8 x i1> %res to i8 + ret i8 %1 +} +declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) + + + ; fp min - max +define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) #0 { +; CHECK-LABEL: @test_vmaxpd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP7]] +; + %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) + ret <8 x double> %1 +} +declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32) + +define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) #0 { +; CHECK-LABEL: @test_vminpd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP7]] +; + %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4) + ret <8 x double> %1 +} +declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32) + +define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_store_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP1]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP1]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[MASK]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP8]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP9]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP11]], <4 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[TMP14]], i32 1, <4 x i1> [[EXTRACT]]) +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i1> [[_MSPROP]] to i4 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]] +; CHECK: 16: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 17: +; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[DATA:%.*]], ptr [[PTR]], i32 1, <4 x i1> [[EXTRACT]]) +; CHECK-NEXT: ret void +; + %1 = and i8 %mask, 1 + %2 = bitcast i8 %1 to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + call void @llvm.masked.store.v4f32.p0(<4 x float> %data, ptr %ptr, i32 1, <4 x i1> %extract) + ret void +} +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) #1 + + +declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32) +declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32) +declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32) + +define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + ret <16 x float> %1 +} + +define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + ret <16 x float> %1 +} + +define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vsubps_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + ret <16 x float> %1 +} + +define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + ret <16 x float> %1 +} + +define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) #0 { +; CHECK-LABEL: @test_vmulps_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_passthru_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_passthru_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_passthru_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_vmulps_mask_passthru_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_vmulpd_mask_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_vmulpd_mask_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_vmulpd_mask_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_vmulpd_mask_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_add_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + ret <16 x float> %1 +} +declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) + +define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_sub_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_sub_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rn_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_ru_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_rz_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_div_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + ret <16 x float> %1 +} +declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32) + +define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_min_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_min_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + ret <16 x float> %1 +} +declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32) + +define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_maskz_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_mask_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src + ret <16 x float> %3 +} + +define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_max_round_ps_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8) + ret <16 x float> %1 +} + +define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_mm512_max_round_ps_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4) + ret <16 x float> %1 +} +declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32) + +declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone + +define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 9) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9) + ret <4 x float> %res +} + +define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 10) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10) + ret <4 x float> %res +} + +define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 11) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11) + ret <4 x float> %res +} + +define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_add_ss_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_add_ss_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_ss_current_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]] +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1.val = load float, ptr %a1 + %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 + %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 + %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 + %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_add_ss_current_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1.val = load float, ptr %a1 + %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 + %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 + %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 + %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 + %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4) + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone + +define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_rd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 9) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9) + ret <2 x double> %res +} + +define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_ru( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 10) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10) + ret <2 x double> %res +} + +define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_rz( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 11) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11) + ret <2 x double> %res +} + +define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_current( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_add_sd_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_add_sd_rn( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_add_sd_current_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1.val = load double, ptr %a1 + %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 + %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_add_sd_current_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1.val = load double, ptr %a1 + %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 + %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 + %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4) + ret <2 x double> %res +} + +declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone + +define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_ss_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_ss_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_max_ss_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_max_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_ss_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]] +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1.val = load float, ptr %a1 + %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 + %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 + %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 + %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4) + ret <4 x float> %res +} + +define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_ss_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %a1.val = load float, ptr %a1 + %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0 + %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1 + %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2 + %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3 + %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4) + ret <4 x float> %res +} +declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone + +define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_sd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_sd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_max_sd_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8) + ret <2 x double> %res +} + +define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_max_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_max_sd_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]] +; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1.val = load double, ptr %a1 + %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 + %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_max_sd_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK: 11: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 12: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %a1.val = load double, ptr %a1 + %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0 + %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1 + %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4) + ret <2 x double> %res +} + +define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) #0 { +; CHECK-LABEL: @test_x86_avx512_cvtsi2ss32( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 11) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone + +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) #0 { +; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 9) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, ptr %ptr) #0 { +; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss_mem( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 9) +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %b = load i32, ptr %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) #0 { +; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 4) +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} + +define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, ptr %ptr) #0 { +; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss_mem( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0 +; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 4) +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %b = load i32, ptr %ptr + %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone + +declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %x2 = load <16 x i32>, ptr %x2p + %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X1]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %x2 = load <16 x i32>, ptr %x2p + %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 + ret <16 x i32> %3 +} + +declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>) + +define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP9]] +; + %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) + ret <8 x double> %1 +} + +define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP20]] +; + %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) + %2 = bitcast <8 x i64> %x1 to <8 x double> + %3 = bitcast i8 %x3 to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2 + ret <8 x double> %4 +} + +declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>) + +define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP9]] +; + %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) + ret <16 x float> %1 +} + +define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP20]] +; + %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) + %2 = bitcast <16 x i32> %x1 to <16 x float> + %3 = bitcast i16 %x3 to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2 + ret <16 x float> %4 +} + +declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP4]] +; + %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) + ret <8 x i64> %1 +} + +define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1 + ret <8 x i64> %3 +} + +define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]] +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %x2 = load <16 x i32>, ptr %x2p + %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, ptr %x2ptr, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> undef, double [[X2S]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer +; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]]) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]] +; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP23]] +; + %x2s = load double, ptr %x2ptr + %x2ins = insertelement <8 x double> undef, double %x2s, i32 0 + %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer + %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 +} + +define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP4]] +; + %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1 + ret <16 x i32> %3 +} + +declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) +define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x double> [[X2]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11) + %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) +define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 10) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x float> [[X2]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10) + %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES4]] +; + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES4]] +; + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i16> [[RES4]] +; + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2) + %res3 = add <8 x i16> %res0, %res1 + %res4 = add <8 x i16> %res3, %res2 + ret <8 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = trunc <8 x i64> %x0 to <8 x i32> + ret <8 x i32> %1 +} + +define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[_MSPROP]], <8 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP4]], [[X1:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP10]], <8 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP4]], <8 x i32> [[X1]] +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP11]] +; + %1 = trunc <8 x i64> %x0 to <8 x i32> + %2 = bitcast i8 %x2 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1 + ret <8 x i32> %3 +} + +define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[_MSPROP]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP7]], [[_MSPROP]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP9]], <8 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> zeroinitializer +; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[TMP10]] +; + %1 = trunc <8 x i64> %x0 to <8 x i32> + %2 = bitcast i8 %x2 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer + ret <8 x i32> %3 +} + +declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmov_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovs_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_maskz_pmovs_qd_512(<8 x i64> %x0, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pmovus_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) + ret <8 x i32> %res +} + +define <8 x i32>@test_int_x86_avx512_maskz_pmovus_qd_512(<8 x i64> %x0, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_qd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]]) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) + ret <8 x i32> %res +} + +declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64>, i8) + +define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1) + call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmov_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovs_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i8> [[RES4]] +; + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2) + %res3 = add <16 x i8> %res0, %res1 + %res4 = add <16 x i8> %res3, %res2 + ret <16 x i8> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovus_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES4]] +; + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmov_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES4]] +; + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i16> [[RES4]] +; + %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2) + %res3 = add <16 x i16> %res0, %res1 + %res4 = add <16 x i16> %res3, %res2 + ret <16 x i16> %res4 +} + +declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32>, i16) + +define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_mem_512( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1) +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]]) +; CHECK-NEXT: ret void +; + call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1) + call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2) + ret void +} + +declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32) + +define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[CVT:%.*]] = sitofp <16 x i32> [[X0:%.*]] to <16 x float> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %cvt = sitofp <16 x i32> %x0 to <16 x float> + %1 = bitcast i16 %x2 to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1 + %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8) + %res2 = fadd <16 x float> %2, %3 + ret <16 x float> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32) + +define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32) + +define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0:%.*]], <8 x float> [[X1:%.*]], i8 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0]], <8 x float> [[X1]], i8 -1, i32 10) +; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x float> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x float> [[RES2]] +; + %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32) + +define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2udq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32) + +define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP1]] to i256 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0]], <8 x double> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32) + +define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2udq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) + +define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32) + +define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[CVT:%.*]] = uitofp <16 x i32> [[X0:%.*]] to <16 x float> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %cvt = uitofp <16 x i32> %x0 to <16 x float> + %1 = bitcast i16 %x2 to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1 + %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8) + %res2 = fadd <16 x float> %2, %3 + ret <16 x float> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) + +define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2udq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i32> [[RES2]] +; + %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) + +define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2dq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) + +define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2udq_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone + +define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_getexp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8) + ret <4 x float> %res +} + +define <4 x float> @test_mask_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_getexp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]] +; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 8) +; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES_1]] +; + %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8) + %res.1 = fadd <4 x float> %res0, %res1 + ret <4 x float> %res.1 +} + +define <4 x float> @test_maskz_getexp_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_getexp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8) + ret <4 x float> %res +} + +declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone + +define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_getexp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4) + ret <2 x double> %res +} + +define <2 x double> @test_mask_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_mask_getexp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]] +; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 8) +; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES_1]] +; + %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8) + %res.1 = fadd <2 x double> %res0, %res1 + ret <2 x double> %res.1 +} + +define <2 x double> @test_maskz_getexp_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_maskz_getexp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8) +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8) + ret <2 x double> %res +} + +declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 5, i8 [[X3:%.*]], i32 8) +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES4]] +; + %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) + ret i8 %res4 +} + +define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd_all( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 2, i8 -1, i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 3, i8 -1, i32 8) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 4, i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]] +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]] +; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 19: +; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 5, i8 [[X3]], i32 8) +; CHECK-NEXT: [[TMP20:%.*]] = xor i8 [[RES1]], -1 +; CHECK-NEXT: [[TMP21:%.*]] = xor i8 [[RES2]], -1 +; CHECK-NEXT: [[TMP22:%.*]] = and i8 [[TMP20]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = and i8 0, [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = or i8 0, [[TMP22]] +; CHECK-NEXT: [[TMP25:%.*]] = or i8 [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[RES11:%.*]] = or i8 [[RES1]], [[RES2]] +; CHECK-NEXT: [[TMP26:%.*]] = xor i8 [[RES3]], -1 +; CHECK-NEXT: [[TMP27:%.*]] = xor i8 [[RES4]], -1 +; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP26]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = and i8 0, [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = or i8 0, [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP30]], [[TMP29]] +; CHECK-NEXT: [[RES12:%.*]] = or i8 [[RES3]], [[RES4]] +; CHECK-NEXT: [[TMP32:%.*]] = xor i8 [[RES11]], -1 +; CHECK-NEXT: [[TMP33:%.*]] = xor i8 [[RES12]], -1 +; CHECK-NEXT: [[TMP34:%.*]] = and i8 [[TMP25]], [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP32]], [[TMP31]] +; CHECK-NEXT: [[TMP36:%.*]] = and i8 [[TMP25]], [[TMP33]] +; CHECK-NEXT: [[TMP37:%.*]] = or i8 [[TMP34]], [[TMP35]] +; CHECK-NEXT: [[TMP38:%.*]] = or i8 [[TMP37]], [[TMP36]] +; CHECK-NEXT: [[RES13:%.*]] = or i8 [[RES11]], [[RES12]] +; CHECK-NEXT: store i8 [[TMP38]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES13]] +; + %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8) + + %res11 = or i8 %res1, %res2 + %res12 = or i8 %res3, %res4 + %res13 = or i8 %res11, %res12 + ret i8 %res13 +} + +declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32) + +define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 3, i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES2]] +; + %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4) + ret i8 %res2 +} + + +define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss_all( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 2, i8 -1, i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 3, i8 -1, i32 8) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0 +; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] +; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]] +; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 4, i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0 +; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]] +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]] +; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]] +; CHECK: 18: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 19: +; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 5, i8 [[X3]], i32 8) +; CHECK-NEXT: [[TMP20:%.*]] = and i8 [[RES1]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i8 0, [[RES2]] +; CHECK-NEXT: [[TMP22:%.*]] = or i8 0, [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = or i8 [[TMP22]], [[TMP21]] +; CHECK-NEXT: [[RES11:%.*]] = and i8 [[RES1]], [[RES2]] +; CHECK-NEXT: [[TMP24:%.*]] = and i8 [[RES3]], 0 +; CHECK-NEXT: [[TMP25:%.*]] = and i8 0, [[RES4]] +; CHECK-NEXT: [[TMP26:%.*]] = or i8 0, [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = or i8 [[TMP26]], [[TMP25]] +; CHECK-NEXT: [[RES12:%.*]] = and i8 [[RES3]], [[RES4]] +; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP23]], [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[RES11]], [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = and i8 [[TMP23]], [[RES12]] +; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP28]], [[TMP29]] +; CHECK-NEXT: [[TMP32:%.*]] = or i8 [[TMP31]], [[TMP30]] +; CHECK-NEXT: [[RES13:%.*]] = and i8 [[RES11]], [[RES12]] +; CHECK-NEXT: store i8 [[TMP32]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i8 [[RES13]] +; + %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4) + %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8) + %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4) + %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8) + + %res11 = and i8 %res1, %res2 + %res12 = and i8 %res3, %res4 + %res13 = and i8 %res11, %res12 + ret i8 %res13 +} + +declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0:%.*]], i32 11, <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0]], i32 11, <8 x double> [[X2]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES2]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0:%.*]], i32 11, <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] +; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] +; CHECK: 10: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 11: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0]], i32 11, <16 x float> [[X2]], i16 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 11, <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 12, <2 x double> zeroinitializer, i8 [[X3]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]] +; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 13, <2 x double> [[X2]], i8 [[X3]], i32 8) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP21:%.*]] = icmp ne i128 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR22:%.*]] = or i1 [[_MSOR20]], [[_MSCMP21]] +; CHECK-NEXT: br i1 [[_MSOR22]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 14, <2 x double> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: [[RES11:%.*]] = fadd <2 x double> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES12:%.*]] = fadd <2 x double> [[RES2]], [[RES3]] +; CHECK-NEXT: [[RES13:%.*]] = fadd <2 x double> [[RES11]], [[RES12]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES13]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 12, <2 x double> zeroinitializer, i8 %x3, i32 4) + %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 13, <2 x double> %x2, i8 %x3, i32 8) + %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 14, <2 x double> %x2, i8 -1, i32 4) + %res11 = fadd <2 x double> %res, %res1 + %res12 = fadd <2 x double> %res2, %res3 + %res13 = fadd <2 x double> %res11, %res12 + ret <2 x double> %res13 +} + +declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 11, <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 12, <4 x float> zeroinitializer, i8 [[X3]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 13, <4 x float> [[X2]], i8 -1, i32 8) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i128 [[TMP19]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i128 [[TMP20]], 0 +; CHECK-NEXT: [[_MSOR18:%.*]] = or i1 [[_MSCMP16]], [[_MSCMP17]] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP21]], 0 +; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSOR18]], [[_MSCMP19]] +; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]] +; CHECK: 22: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 23: +; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 14, <4 x float> [[X2]], i8 -1, i32 4) +; CHECK-NEXT: [[RES11:%.*]] = fadd <4 x float> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES12:%.*]] = fadd <4 x float> [[RES2]], [[RES3]] +; CHECK-NEXT: [[RES13:%.*]] = fadd <4 x float> [[RES11]], [[RES12]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES13]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 12, <4 x float> zeroinitializer, i8 %x3, i32 4) + %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 13, <4 x float> %x2, i8 -1, i32 8) + %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 14, <4 x float> %x2, i8 -1, i32 4) + %res11 = fadd <4 x float> %res, %res1 + %res12 = fadd <4 x float> %res2, %res3 + %res13 = fadd <4 x float> %res11, %res12 + ret <4 x float> %res13 +} + +define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, ptr %x1p) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[X1:%.*]] = load <4 x float>, ptr [[X1P:%.*]], align 16 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[X1P]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1]], i32 11, <4 x float> undef, i8 -1, i32 4) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %x1 = load <4 x float>, ptr %x1p + %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> undef, i8 -1, i32 4) + ret <4 x float> %res +} + +declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>) + +define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) + ret <8 x double> %res +} + +define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES2]] +; + %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2 + ret <8 x double> %res2 +} + +define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES2]] +; + %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>) + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2 + ret <16 x float> %res2 +} + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer + ret <16 x float> %res2 +} + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> ) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> ) + ret <16 x float> %res +} + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> ) +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> ) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2 + ret <16 x float> %res2 +} + +define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 { +; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> ) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES2]] +; + %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> ) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer + ret <16 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ss2sd_round( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0:%.*]], <4 x float> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0]], <4 x float> [[X1]], <2 x double> [[X2]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[RES]], [[RES1]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES2]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_sd2ss_round( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0:%.*]], <2 x double> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 11) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] +; CHECK: 13: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 14: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0]], <2 x double> [[X1]], <4 x float> [[X2]], i8 -1, i32 8) +; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[RES]], [[RES1]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES2]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11) + %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP9]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X0]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X0]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) + %2 = bitcast i16 %x4 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0 + ret <16 x i32> %3 +} + +define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP17]] +; + %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33) + %2 = bitcast i16 %x4 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 { +; CHECK-LABEL: @test_int_x86_avx512_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP9]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) + ret <8 x i64> %1 +} + +define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[X0]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[X0]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) + %2 = bitcast i8 %x4 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0 + ret <8 x i64> %3 +} + +define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP17]] +; + %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33) + %2 = bitcast i8 %x4 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 +} + +define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_comi_sd_eq_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 8) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 8) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_comi_sd_eq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_comi_sd_lt_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 8) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt_sae( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 8) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8) + ret i32 %res +} + +define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_comi_sd_lt( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4) + ret i32 %res +} + +define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32) + +define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_ucomi_ss_lt( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 9, i32 4) +; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i32 [[RES]] +; + %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32) + +declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) + +define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP7]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) + ret <8 x double> %1 +} + +define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64> +; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP18]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2 + ret <8 x double> %3 +} + +define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[TMP16]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer + ret <8 x double> %3 +} + +declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) + +define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) + ret <8 x i64> %1 +} + +define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP12]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2 + ret <8 x i64> %3 +} + +define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[TMP11]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1) + %2 = bitcast i8 %x3 to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer + ret <8 x i64> %3 +} + +declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) + +define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP7]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) + ret <16 x float> %1 +} + +define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 7: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP18]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2 + ret <16 x float> %3 +} + +define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP16]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer + ret <16 x float> %3 +} + +declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) + +define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1) #0 { +; CHECK-LABEL: @test_int_x86_avx512_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) + ret <16 x i32> %1 +} + +define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]] +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP12]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 + ret <16 x i32> %3 +} + +define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[TMP11]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1) + %2 = bitcast i16 %x3 to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer + ret <16 x i32> %3 +} + +declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) + +define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 4, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> [[X1]], <8 x i64> [[X2]], i32 5, i8 [[X4]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 3, i8 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES4]] +; + %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) + %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, ptr %x2ptr) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X2:%.*]] = load <8 x i64>, ptr [[X2PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2]], i32 3, i8 -1, i32 4) +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %x2 = load <8 x i64>, ptr %x2ptr + %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4) + ret <8 x double> %res +} + +declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32) + +define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_pd_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 3, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 2, i8 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]] +; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES4]] +; + %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) + %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) + %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) + +define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 5, i8 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) + %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32) + +define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 8) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 6, i8 -1, i32 4) +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 6, i8 -1, i32 4) + %res3 = fadd <4 x float> %res, %res1 + %res4 = fadd <4 x float> %res3, %res2 + ret <4 x float> %res4 +} + +declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) + +define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 5, i16 [[X4]], i32 4) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 5, i16 -1, i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES4]] +; + %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) + %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, ptr %x2ptr) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512_load( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2PTR:%.*]], align 64 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2]], i32 5, i16 -1, i32 4) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %x2 = load <16 x i32>, ptr %x2ptr + %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) + ret <16 x float> %res +} + +declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32) + +define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ps_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 6, i16 [[X4]], i32 8) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 7, i16 -1, i32 4) +; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]] +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES4]] +; + %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) + %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 6, i16 %x4, i32 8) + %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 7, i16 -1, i32 4) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res3, %res2 + ret <16 x float> %res4 +} + +declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) + +define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 6, i8 -1, i32 4) +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 6, i8 -1, i32 4) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32) + +define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] +; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]] +; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0 +; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]] +; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]] +; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] +; CHECK: 12: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 13: +; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0 +; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0 +; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]] +; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]] +; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]] +; CHECK: 17: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 18: +; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 5, i8 [[X4]], i32 8) +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) + %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) + %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8) + %res3 = fadd <2 x double> %res, %res1 + %res4 = fadd <2 x double> %res3, %res2 + ret <2 x double> %res4 +} + +declare double @llvm.fma.f64(double, double, double) #1 +declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #0 + +define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP5]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X0]], double [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP11]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP27]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP11]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP27]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X0]], double [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %1 = extractelement <2 x double> %x0, i64 0 + %2 = extractelement <2 x double> %x1, i64 0 + %3 = extractelement <2 x double> %x2, i64 0 + %4 = call double @llvm.fma.f64(double %1, double %2, double %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, double %4, double %1 + %8 = insertelement <2 x double> %x0, double %7, i64 0 + %9 = extractelement <2 x double> %x0, i64 0 + %10 = extractelement <2 x double> %x1, i64 0 + %11 = extractelement <2 x double> %x2, i64 0 + %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11) + %13 = insertelement <2 x double> %x0, double %12, i64 0 + %14 = extractelement <2 x double> %x0, i64 0 + %15 = extractelement <2 x double> %x1, i64 0 + %16 = extractelement <2 x double> %x2, i64 0 + %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10) + %18 = bitcast i8 %x3 to <8 x i1> + %19 = extractelement <8 x i1> %18, i64 0 + %20 = select i1 %19, double %17, double %14 + %21 = insertelement <2 x double> %x0, double %20, i64 0 + %res3 = fadd <2 x double> %8, %13 + %res4 = fadd <2 x double> %21, %res3 + ret <2 x double> %res4 +} + +define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP5]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP11]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP27]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP11]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP27]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X0]], float [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %x1, i64 0 + %3 = extractelement <4 x float> %x2, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float %1 + %8 = insertelement <4 x float> %x0, float %7, i64 0 + %9 = extractelement <4 x float> %x0, i64 0 + %10 = extractelement <4 x float> %x1, i64 0 + %11 = extractelement <4 x float> %x2, i64 0 + %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11) + %13 = insertelement <4 x float> %x0, float %12, i64 0 + %14 = extractelement <4 x float> %x0, i64 0 + %15 = extractelement <4 x float> %x1, i64 0 + %16 = extractelement <4 x float> %x2, i64 0 + %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10) + %18 = bitcast i8 %x3 to <8 x i1> + %19 = extractelement <8 x i1> %18, i64 0 + %20 = select i1 %19, float %17, float %14 + %21 = insertelement <4 x float> %x0, float %20, i64 0 + %res3 = fadd <4 x float> %8, %13 + %res4 = fadd <4 x float> %21, %res3 + ret <4 x float> %res4 +} + +define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_sd( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 0, i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast double [[TMP4]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP10]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i64 [[TMP11]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> [[X0]], double [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]], i32 11) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 0, i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i64 [[TMP24]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], double [[TMP17]], double 0.000000e+00 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0 +; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[TMP13]], [[TMP26]] +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES2]] +; + %1 = extractelement <2 x double> %x0, i64 0 + %2 = extractelement <2 x double> %x1, i64 0 + %3 = extractelement <2 x double> %x2, i64 0 + %4 = call double @llvm.fma.f64(double %1, double %2, double %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, double %4, double 0.000000e+00 + %8 = insertelement <2 x double> %x0, double %7, i64 0 + %9 = extractelement <2 x double> %x0, i64 0 + %10 = extractelement <2 x double> %x1, i64 0 + %11 = extractelement <2 x double> %x2, i64 0 + %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11) + %13 = bitcast i8 %x3 to <8 x i1> + %14 = extractelement <8 x i1> %13, i64 0 + %15 = select i1 %14, double %12, double 0.000000e+00 + %16 = insertelement <2 x double> %x0, double %15, i64 0 + %res2 = fadd <2 x double> %8, %16 + ret <2 x double> %res2 +} + +declare float @llvm.fma.f32(float, float, float) #1 +declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #0 + +define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 0, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast float [[TMP4]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP11]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[X0]], float [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]], i32 11) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 0, i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i32 [[TMP24]], i32 [[TMP20]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP17]], float 0.000000e+00 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0 +; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[TMP13]], [[TMP26]] +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES2]] +; + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %x1, i64 0 + %3 = extractelement <4 x float> %x2, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float 0.000000e+00 + %8 = insertelement <4 x float> %x0, float %7, i64 0 + %9 = extractelement <4 x float> %x0, i64 0 + %10 = extractelement <4 x float> %x1, i64 0 + %11 = extractelement <4 x float> %x2, i64 0 + %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11) + %13 = bitcast i8 %x3 to <8 x i1> + %14 = extractelement <8 x i1> %13, i64 0 + %15 = select i1 %14, float %12, float 0.000000e+00 + %16 = insertelement <4 x float> %x0, float %15, i64 0 + %res2 = fadd <4 x float> %8, %16 + ret <4 x float> %res2 +} + +define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr nocapture readonly %1, float %2, float %3) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_load0( +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 16 +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP1]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16 +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP6]] +; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP7]] +; CHECK-NEXT: [[TMP16:%.*]] = tail call float @llvm.fma.f32(float [[TMP15]], float [[TMP2:%.*]], float [[TMP3:%.*]]) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP8]] to <8 x i1> +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP0:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <8 x i1> [[TMP17]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[_MSPROP2]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP16]] to i32 +; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP3]], i32 [[TMP24]], i32 [[TMP20]] +; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP16]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <4 x i32> [[_MSLD]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP25]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP26]] +; + %5 = load <4 x float>, ptr %1, align 16 + %6 = extractelement <4 x float> %5, i64 0 + %7 = tail call float @llvm.fma.f32(float %6, float %2, float %3) #2 + %8 = bitcast i8 %0 to <8 x i1> + %9 = extractelement <8 x i1> %8, i64 0 + %10 = select i1 %9, float %7, float 0.000000e+00 + %11 = insertelement <4 x float> %5, float %10, i64 0 + ret <4 x float> %11 +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP2]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP7]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP7]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X2]], double [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X2]], double [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP13]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP29]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP13]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP29]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X2]], double [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %1 = extractelement <2 x double> %x0, i64 0 + %2 = extractelement <2 x double> %x1, i64 0 + %3 = extractelement <2 x double> %x2, i64 0 + %4 = call double @llvm.fma.f64(double %1, double %2, double %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, double %4, double %3 + %8 = insertelement <2 x double> %x2, double %7, i64 0 + %9 = extractelement <2 x double> %x0, i64 0 + %10 = extractelement <2 x double> %x1, i64 0 + %11 = extractelement <2 x double> %x2, i64 0 + %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11) + %13 = insertelement <2 x double> %x2, double %12, i64 0 + %14 = extractelement <2 x double> %x0, i64 0 + %15 = extractelement <2 x double> %x1, i64 0 + %16 = extractelement <2 x double> %x2, i64 0 + %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10) + %18 = bitcast i8 %x3 to <8 x i1> + %19 = extractelement <8 x i1> %18, i64 0 + %20 = select i1 %19, double %17, double %16 + %21 = insertelement <2 x double> %x2, double %20, i64 0 + %res3 = fadd <2 x double> %8, %13 + %res4 = fadd <2 x double> %21, %res3 + ret <2 x double> %res4 +} + +define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP2]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP7]] +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X2]], float [[TMP18]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0 +; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]] +; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]] +; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]] +; CHECK: 23: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11) +; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP3]], i32 0, i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X2]], float [[TMP25]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0 +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]] +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]] +; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]] +; CHECK: 30: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 31: +; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10) +; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP13]] +; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP29]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0 +; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP13]] +; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP29]] +; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT15]], i64 0 +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X2]], float [[TMP42]], i64 0 +; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]] +; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %x1, i64 0 + %3 = extractelement <4 x float> %x2, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float %3 + %8 = insertelement <4 x float> %x2, float %7, i64 0 + %9 = extractelement <4 x float> %x0, i64 0 + %10 = extractelement <4 x float> %x1, i64 0 + %11 = extractelement <4 x float> %x2, i64 0 + %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11) + %13 = insertelement <4 x float> %x2, float %12, i64 0 + %14 = extractelement <4 x float> %x0, i64 0 + %15 = extractelement <4 x float> %x1, i64 0 + %16 = extractelement <4 x float> %x2, i64 0 + %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10) + %18 = bitcast i8 %x3 to <8 x i1> + %19 = extractelement <8 x i1> %18, i64 0 + %20 = select i1 %19, float %17, float %16 + %21 = insertelement <4 x float> %x2, float %20, i64 0 + %res3 = fadd <4 x float> %8, %13 + %res4 = fadd <4 x float> %21, %res3 + ret <4 x float> %res4 +} + +define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) #0 { +; CHECK-LABEL: @fmadd_ss_mask_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1 +; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2 +; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3 +; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]] +; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 [[_MSPROP9]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP14]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], [[_MSPROP13]] +; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP25]], [[_MSPROP9]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP26]], i32 [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], float [[TMP17]], float [[TMP14]] +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> [[AV]], float [[TMP27]], i64 0 +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP28]], i32 0 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080 +; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP33]], align 4 +; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4 +; CHECK-NEXT: ret void +; + %a.val = load float, ptr %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, ptr %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + %1 = extractelement <4 x float> %av, i64 0 + %2 = extractelement <4 x float> %bv, i64 0 + %3 = extractelement <4 x float> %av, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %c to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float %1 + %8 = insertelement <4 x float> %av, float %7, i64 0 + %sr = extractelement <4 x float> %8, i32 0 + store float %sr, ptr %a + ret void +} + +define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 { +; CHECK-LABEL: @fmadd_ss_maskz_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1 +; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2 +; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3 +; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1 +; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2 +; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2 +; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3 +; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]] +; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP13]] +; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP25]], i32 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], float [[TMP17]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[AV]], float [[TMP26]], i64 0 +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP27]], i32 0 +; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP32]], align 4 +; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4 +; CHECK-NEXT: ret void +; + %a.val = load float, ptr %a + %av0 = insertelement <4 x float> undef, float %a.val, i32 0 + %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1 + %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2 + %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3 + + %b.val = load float, ptr %b + %bv0 = insertelement <4 x float> undef, float %b.val, i32 0 + %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1 + %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2 + %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3 + %1 = extractelement <4 x float> %av, i64 0 + %2 = extractelement <4 x float> %bv, i64 0 + %3 = extractelement <4 x float> %av, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %c to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float 0.000000e+00 + %8 = insertelement <4 x float> %av, float %7, i64 0 + %sr = extractelement <4 x float> %8, i32 0 + store float %sr, ptr %a + ret void +} + +define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) #0 { +; CHECK-LABEL: @fmadd_sd_mask_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1 +; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]] +; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast double [[TMP14]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP25]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP26]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], double [[TMP17]], double [[TMP14]] +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[AV]], double [[TMP27]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP28]], i32 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]] +; CHECK: 29: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 30: +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080 +; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP33]], align 8 +; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8 +; CHECK-NEXT: ret void +; + %a.val = load double, ptr %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, ptr %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + %1 = extractelement <2 x double> %av, i64 0 + %2 = extractelement <2 x double> %bv, i64 0 + %3 = extractelement <2 x double> %av, i64 0 + %4 = call double @llvm.fma.f64(double %1, double %2, double %3) + %5 = bitcast i8 %c to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, double %4, double %1 + %8 = insertelement <2 x double> %av, double %7, i64 0 + %sr = extractelement <2 x double> %8, i32 0 + store double %sr, ptr %a + ret void +} + +define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 { +; CHECK-LABEL: @fmadd_sd_maskz_memfold( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0 +; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1 +; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]] +; CHECK: 9: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 10: +; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080 +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8 +; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0 +; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1 +; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1 +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0 +; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0 +; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]] +; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]] +; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]]) +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0 +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP9]] +; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP25]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], double [[TMP17]], double 0.000000e+00 +; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> [[AV]], double [[TMP26]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0 +; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP27]], i32 0 +; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64 +; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP32]], align 8 +; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8 +; CHECK-NEXT: ret void +; + %a.val = load double, ptr %a + %av0 = insertelement <2 x double> undef, double %a.val, i32 0 + %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1 + + %b.val = load double, ptr %b + %bv0 = insertelement <2 x double> undef, double %b.val, i32 0 + %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1 + %1 = extractelement <2 x double> %av, i64 0 + %2 = extractelement <2 x double> %bv, i64 0 + %3 = extractelement <2 x double> %av, i64 0 + %4 = call double @llvm.fma.f64(double %1, double %2, double %3) + %5 = bitcast i8 %c to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, double %4, double 0.000000e+00 + %8 = insertelement <2 x double> %av, double %7, i64 0 + %sr = extractelement <2 x double> %8, i32 0 + store double %sr, ptr %a + ret void +} + +define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP6]], double [[TMP7]], double [[TMP8]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[_MSPROP4]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast double [[TMP9]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP19]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], double [[TMP9]], double [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> [[X2]], double [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP22]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP23]], double [[TMP24]], double [[TMP25]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x double> [[X2]], double [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP31]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]] +; CHECK: 35: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 36: +; CHECK-NEXT: [[TMP37:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP32]], double [[TMP33]], double [[TMP34]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i64 0, i64 [[_MSPROP16]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast double [[TMP37]] to i64 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast double [[TMP38]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = xor i64 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP46]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP47]], i64 [[TMP42]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], double [[TMP37]], double [[TMP38]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x double> [[X2]], double [[TMP48]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP21]], [[TMP30]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP49]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %1 = fneg <2 x double> %x2 + %2 = extractelement <2 x double> %x0, i64 0 + %3 = extractelement <2 x double> %x1, i64 0 + %4 = extractelement <2 x double> %1, i64 0 + %5 = call double @llvm.fma.f64(double %2, double %3, double %4) + %6 = extractelement <2 x double> %x2, i64 0 + %7 = bitcast i8 %x3 to <8 x i1> + %8 = extractelement <8 x i1> %7, i64 0 + %9 = select i1 %8, double %5, double %6 + %10 = insertelement <2 x double> %x2, double %9, i64 0 + %11 = fneg <2 x double> %x2 + %12 = extractelement <2 x double> %x0, i64 0 + %13 = extractelement <2 x double> %x1, i64 0 + %14 = extractelement <2 x double> %11, i64 0 + %15 = call double @llvm.x86.avx512.vfmadd.f64(double %12, double %13, double %14, i32 11) + %16 = extractelement <2 x double> %x2, i64 0 + %17 = insertelement <2 x double> %x2, double %15, i64 0 + %18 = fneg <2 x double> %x2 + %19 = extractelement <2 x double> %x0, i64 0 + %20 = extractelement <2 x double> %x1, i64 0 + %21 = extractelement <2 x double> %18, i64 0 + %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 10) + %23 = extractelement <2 x double> %x2, i64 0 + %24 = bitcast i8 %x3 to <8 x i1> + %25 = extractelement <8 x i1> %24, i64 0 + %26 = select i1 %25, double %22, double %23 + %27 = insertelement <2 x double> %x2, double %26, i64 0 + %res3 = fadd <2 x double> %10, %17 + %res4 = fadd <2 x double> %27, %res3 + ret <2 x double> %res4 +} + +define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.fma.f32(float [[TMP6]], float [[TMP7]], float [[TMP8]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[_MSPROP4]], i32 [[_MSPROP5]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = xor i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP17]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP19]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], float [[TMP9]], float [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[X2]], float [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP22]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]] +; CHECK: 26: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 27: +; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP23]], float [[TMP24]], float [[TMP25]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[X2]], float [[TMP28]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x float> [[X0]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x float> [[TMP31]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]] +; CHECK: 35: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 36: +; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP32]], float [[TMP33]], float [[TMP34]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 0, i32 [[_MSPROP16]] +; CHECK-NEXT: [[TMP43:%.*]] = bitcast float [[TMP37]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[TMP38]] to i32 +; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP43]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], 0 +; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP46]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP47]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], float [[TMP37]], float [[TMP38]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[X2]], float [[TMP48]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP21]], [[TMP30]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP49]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %1 = fneg <4 x float> %x2 + %2 = extractelement <4 x float> %x0, i64 0 + %3 = extractelement <4 x float> %x1, i64 0 + %4 = extractelement <4 x float> %1, i64 0 + %5 = call float @llvm.fma.f32(float %2, float %3, float %4) + %6 = extractelement <4 x float> %x2, i64 0 + %7 = bitcast i8 %x3 to <8 x i1> + %8 = extractelement <8 x i1> %7, i64 0 + %9 = select i1 %8, float %5, float %6 + %10 = insertelement <4 x float> %x2, float %9, i64 0 + %11 = fneg <4 x float> %x2 + %12 = extractelement <4 x float> %x0, i64 0 + %13 = extractelement <4 x float> %x1, i64 0 + %14 = extractelement <4 x float> %11, i64 0 + %15 = call float @llvm.x86.avx512.vfmadd.f32(float %12, float %13, float %14, i32 11) + %16 = extractelement <4 x float> %x2, i64 0 + %17 = insertelement <4 x float> %x2, float %15, i64 0 + %18 = fneg <4 x float> %x2 + %19 = extractelement <4 x float> %x0, i64 0 + %20 = extractelement <4 x float> %x1, i64 0 + %21 = extractelement <4 x float> %18, i64 0 + %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 10) + %23 = extractelement <4 x float> %x2, i64 0 + %24 = bitcast i8 %x3 to <8 x i1> + %25 = extractelement <8 x i1> %24, i64 0 + %26 = select i1 %25, float %22, float %23 + %27 = insertelement <4 x float> %x2, float %26, i64 0 + %res3 = fadd <4 x float> %10, %17 + %res4 = fadd <4 x float> %27, %res3 + ret <4 x float> %res4 +} + +define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_sd( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = fneg <2 x double> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP10:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP9]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[_MSPROP4]], i64 [[_MSPROP5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast double [[TMP11]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP20]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], double [[TMP10]], double [[TMP11]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[X2]], double [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = fneg <2 x double> [[X0]] +; CHECK-NEXT: [[TMP24:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP23]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[TMP24]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP25]], double [[TMP26]], double [[TMP27]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP2]], i64 0, i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[X2]], double [[TMP30]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = fneg <2 x double> [[X0]] +; CHECK-NEXT: [[TMP34:%.*]] = fneg <2 x double> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP34]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 38: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 39: +; CHECK-NEXT: [[TMP40:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP35]], double [[TMP36]], double [[TMP37]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[X2]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 [[_MSPROP16]] +; CHECK-NEXT: [[TMP46:%.*]] = bitcast double [[TMP40]] to i64 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast double [[TMP41]] to i64 +; CHECK-NEXT: [[TMP48:%.*]] = xor i64 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = or i64 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = or i64 [[TMP49]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP50]], i64 [[TMP45]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], double [[TMP40]], double [[TMP41]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x double> [[X2]], double [[TMP51]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP22]], [[TMP32]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP52]], [[RES3]] +; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <2 x double> [[RES4]] +; + %1 = fneg <2 x double> %x0 + %2 = fneg <2 x double> %x2 + %3 = extractelement <2 x double> %1, i64 0 + %4 = extractelement <2 x double> %x1, i64 0 + %5 = extractelement <2 x double> %2, i64 0 + %6 = call double @llvm.fma.f64(double %3, double %4, double %5) + %7 = extractelement <2 x double> %x2, i64 0 + %8 = bitcast i8 %x3 to <8 x i1> + %9 = extractelement <8 x i1> %8, i64 0 + %10 = select i1 %9, double %6, double %7 + %11 = insertelement <2 x double> %x2, double %10, i64 0 + %12 = fneg <2 x double> %x0 + %13 = fneg <2 x double> %x2 + %14 = extractelement <2 x double> %12, i64 0 + %15 = extractelement <2 x double> %x1, i64 0 + %16 = extractelement <2 x double> %13, i64 0 + %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11) + %18 = extractelement <2 x double> %x2, i64 0 + %19 = insertelement <2 x double> %x2, double %17, i64 0 + %20 = fneg <2 x double> %x0 + %21 = fneg <2 x double> %x2 + %22 = extractelement <2 x double> %20, i64 0 + %23 = extractelement <2 x double> %x1, i64 0 + %24 = extractelement <2 x double> %21, i64 0 + %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 10) + %26 = extractelement <2 x double> %x2, i64 0 + %27 = bitcast i8 %x3 to <8 x i1> + %28 = extractelement <8 x i1> %27, i64 0 + %29 = select i1 %28, double %25, double %26 + %30 = insertelement <2 x double> %x2, double %29, i64 0 + %res3 = fadd <2 x double> %11, %19 + %res4 = fadd <2 x double> %30, %res3 + ret <2 x double> %res4 +} + +define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_ss( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X0:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = fneg <4 x float> [[X2:%.*]] +; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP5]], i64 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]] +; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.fma.f32(float [[TMP7]], float [[TMP8]], float [[TMP9]]) +; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[_MSPROP4]], i32 [[_MSPROP5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP11]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP4]] +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP20]], i32 [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], float [[TMP10]], float [[TMP11]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[X2]], float [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = fneg <4 x float> [[X0]] +; CHECK-NEXT: [[TMP24:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP23]], i64 0 +; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[TMP24]], i64 0 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0 +; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]] +; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0 +; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]] +; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]] +; CHECK: 28: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP25]], float [[TMP26]], float [[TMP27]], i32 11) +; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i64 0 +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[X2]], float [[TMP30]], i64 0 +; CHECK-NEXT: [[TMP33:%.*]] = fneg <4 x float> [[X0]] +; CHECK-NEXT: [[TMP34:%.*]] = fneg <4 x float> [[X2]] +; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i64 0 +; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x float> [[X1]], i64 0 +; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x float> [[TMP34]], i64 0 +; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0 +; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0 +; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]] +; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0 +; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]] +; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]] +; CHECK: 38: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 39: +; CHECK-NEXT: [[TMP40:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP35]], float [[TMP36]], float [[TMP37]], i32 10) +; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[X2]], i64 0 +; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0 +; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0 +; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 0, i32 [[_MSPROP16]] +; CHECK-NEXT: [[TMP46:%.*]] = bitcast float [[TMP40]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = bitcast float [[TMP41]] to i32 +; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP46]], [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[_MSPROP16]] +; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP50]], i32 [[TMP45]] +; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], float [[TMP40]], float [[TMP41]] +; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT18]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[X2]], float [[TMP51]], i64 0 +; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]] +; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP22]], [[TMP32]] +; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]] +; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP52]], [[RES3]] +; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[RES4]] +; + %1 = fneg <4 x float> %x0 + %2 = fneg <4 x float> %x2 + %3 = extractelement <4 x float> %1, i64 0 + %4 = extractelement <4 x float> %x1, i64 0 + %5 = extractelement <4 x float> %2, i64 0 + %6 = call float @llvm.fma.f32(float %3, float %4, float %5) + %7 = extractelement <4 x float> %x2, i64 0 + %8 = bitcast i8 %x3 to <8 x i1> + %9 = extractelement <8 x i1> %8, i64 0 + %10 = select i1 %9, float %6, float %7 + %11 = insertelement <4 x float> %x2, float %10, i64 0 + %12 = fneg <4 x float> %x0 + %13 = fneg <4 x float> %x2 + %14 = extractelement <4 x float> %12, i64 0 + %15 = extractelement <4 x float> %x1, i64 0 + %16 = extractelement <4 x float> %13, i64 0 + %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11) + %18 = extractelement <4 x float> %x2, i64 0 + %19 = insertelement <4 x float> %x2, float %17, i64 0 + %20 = fneg <4 x float> %x0 + %21 = fneg <4 x float> %x2 + %22 = extractelement <4 x float> %20, i64 0 + %23 = extractelement <4 x float> %x1, i64 0 + %24 = extractelement <4 x float> %21, i64 0 + %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 10) + %26 = extractelement <4 x float> %x2, i64 0 + %27 = bitcast i8 %x3 to <8 x i1> + %28 = extractelement <8 x i1> %27, i64 0 + %29 = select i1 %28, float %25, float %26 + %30 = insertelement <4 x float> %x2, float %29, i64 0 + %res3 = fadd <4 x float> %11, %19 + %res4 = fadd <4 x float> %30, %res3 + ret <4 x float> %res4 +} + +define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP3]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP12]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X1]], float [[TMP23]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP24]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %vecinit.i, i64 0 + %3 = extractelement <4 x float> %x1, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float %3 + %8 = insertelement <4 x float> %x1, float %7, i64 0 + ret <4 x float> %8 +} + +define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1> +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1> +; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP1]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP1]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP10]] +; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X0]], float [[TMP23]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP24]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %vecinit.i, i64 0 + %3 = extractelement <4 x float> %x1, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = bitcast i8 %x3 to <8 x i1> + %6 = extractelement <8 x i1> %5, i64 0 + %7 = select i1 %6, float %4, float %1 + %8 = insertelement <4 x float> %x0, float %7, i64 0 + ret <4 x float> %8 +} + + +define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 { +; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_rm( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] +; CHECK: 4: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 5: +; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0 +; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0 +; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0 +; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0 +; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]] +; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]] +; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.fma.f32(float [[TMP9]], float [[TMP10]], float [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = select i1 false, i32 [[_MSPROP5]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP5]] +; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 0 +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP17]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = select i1 false, float [[TMP12]], float 0.000000e+00 +; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0 +; CHECK-NEXT: store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x float> [[TMP19]] +; + %q = load float, ptr %ptr_b + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %1 = extractelement <4 x float> %x0, i64 0 + %2 = extractelement <4 x float> %x1, i64 0 + %3 = extractelement <4 x float> %vecinit.i, i64 0 + %4 = call float @llvm.fma.f32(float %1, float %2, float %3) + %5 = select i1 false, float %4, float 0.000000e+00 + %6 = insertelement <4 x float> %x0, float %5, i64 0 + ret <4 x float> %6 +} + +define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psll_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psll_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psll_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psll_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psll_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psll_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_pslli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_pslli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_pslli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone + + +define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_pslli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_pslli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_pslli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone + + +define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psra_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psra_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psra_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psra_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psra_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psra_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone + + + +define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrai_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrai_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrai_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone + + +define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrai_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrai_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrai_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone + + + +define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrl_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrl_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone + + +define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrl_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrl_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512 +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrl_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64> +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone + + +define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru + ret <16 x i32> %res2 +} +define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrli_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1] + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} +declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone + + +define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) #0 { +; CHECK-LABEL: @test_x86_avx512_psrli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]] +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru + ret <8 x i64> %res2 +} +define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrli_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1] + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} +declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone + +define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_psllv_d_512_const() #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_d_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) + %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) + %res2 = add <16 x i32> %res0, %res1 + ret <16 x i32> %res2 +} + +define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psllv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 + ret <16 x i32> %res2 +} + +define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psllv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone + +define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_psllv_q_512_const() #0 { +; CHECK-LABEL: @test_x86_avx512_psllv_q_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) + %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) + %res2 = add <8 x i64> %res0, %res1 + ret <8 x i64> %res2 +} + +define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psllv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 + ret <8 x i64> %res2 +} + +define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psllv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrav_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrav_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 + ret <16 x i32> %res2 +} + +define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrav_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrav_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrav_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 + ret <8 x i64> %res2 +} + +define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrav_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone + +define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) + ret <16 x i32> %res +} + +define <16 x i32> @test_x86_avx512_psrlv_d_512_const() #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_d_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) + %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) + %res2 = add <16 x i32> %res0, %res1 + ret <16 x i32> %res2 +} + +define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrlv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2 + ret <16 x i32> %res2 +} + +define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_d_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x i32> [[RES2]] +; + %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1) + %mask.cast = bitcast i16 %mask to <16 x i1> + %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone + +define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) + ret <8 x i64> %res +} + +define <8 x i64> @test_x86_avx512_psrlv_q_512_const() #0 { +; CHECK-LABEL: @test_x86_avx512_psrlv_q_512_const( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> ) +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> ) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]] +; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) + %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) + %res2 = add <8 x i64> %res0, %res1 + ret <8 x i64> %res2 +} + +define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_mask_psrlv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]] +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2 + ret <8 x i64> %res2 +} + +define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 { +; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_q_512( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1> +; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer +; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES2]] +; + %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1) + %mask.cast = bitcast i8 %mask to <8 x i1> + %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone + + +define <8 x double> @test_mm256_castpd128_pd256_freeze(<2 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm256_castpd128_pd256_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <2 x double> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> [[A1]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %a1 = freeze <2 x double> poison + %res = shufflevector <2 x double> %a0, <2 x double> %a1, <8 x i32> + ret <8 x double> %res +} + + +define <8 x double> @test_mm256_castpd256_pd256_freeze(<4 x double> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm256_castpd256_pd256_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <4 x double> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> [[A1]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x double> [[RES]] +; + %a1 = freeze <4 x double> poison + %res = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> + ret <8 x double> %res +} + + +define <16 x float> @test_mm256_castps128_ps512_freeze(<4 x float> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm256_castps128_ps512_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <4 x float> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %a1 = freeze <4 x float> poison + %res = shufflevector <4 x float> %a0, <4 x float> %a1, <16x i32> + ret <16 x float> %res +} + + +define <16 x float> @test_mm256_castps256_ps512_freeze(<8 x float> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm256_castps256_ps512_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <8 x float> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> zeroinitializer, <16 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> [[A1]], <16 x i32> +; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[RES]] +; + %a1 = freeze <8 x float> poison + %res = shufflevector <8 x float> %a0, <8 x float> %a1, <16x i32> + ret <16 x float> %res +} + + +define <8 x i64> @test_mm512_castsi128_si512_freeze(<2 x i64> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm512_castsi128_si512_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <2 x i64> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %a1 = freeze <2 x i64> poison + %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <8 x i32> + ret <8 x i64> %res +} + + +define <8 x i64> @test_mm512_castsi256_si512_pd256_freeze(<4 x i64> %a0) nounwind #0 { +; CHECK-LABEL: @test_mm512_castsi256_si512_pd256_freeze( +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[A1:%.*]] = freeze <4 x i64> poison +; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> [[A1]], <8 x i32> +; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <8 x i64> [[RES]] +; + %a1 = freeze <4 x i64> poison + %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> + ret <8 x i64> %res +} + + +define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 { +; CHECK-LABEL: @bad_mask_transition( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP0]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] +; CHECK: 8: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP10]] to i8 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP2]] to i512 +; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP12]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i64> [[TMP3]] to i512 +; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP13]], 0 +; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] +; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]] +; CHECK: 14: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 15: +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[C:%.*]], <8 x double> [[D:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i1> [[TMP16]] to i8 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i16 +; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP17]] to i16 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[CONV]] to <16 x i1> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[CONV2]] to <16 x i1> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP18]], <16 x i1> undef, <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x i1> [[TMP19]], <16 x i1> undef, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP22]], <16 x i32> [[TMP4]], <16 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = xor <16 x i32> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = or <16 x i32> [[TMP26]], [[TMP4]] +; CHECK-NEXT: [[TMP28:%.*]] = or <16 x i32> [[TMP27]], [[TMP5]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP28]], <16 x i32> [[TMP23]] +; CHECK-NEXT: [[TMP29:%.*]] = select <16 x i1> [[TMP22]], <16 x float> [[F]], <16 x float> [[E]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP29]] +; +entry: + %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> , i32 4) + %1 = bitcast <8 x i1> %0 to i8 + %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, <8 x i1> , i32 4) + %3 = bitcast <8 x i1> %2 to i8 + %conv = zext i8 %1 to i16 + %conv2 = zext i8 %3 to i16 + %4 = bitcast i16 %conv to <16 x i1> + %5 = bitcast i16 %conv2 to <16 x i1> + %6 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> + %7 = shufflevector <16 x i1> %5, <16 x i1> undef, <8 x i32> + %8 = shufflevector <8 x i1> %6, <8 x i1> %7, <16 x i32> + %9 = select <16 x i1> %8, <16 x float> %f, <16 x float> %e + ret <16 x float> %9 +} + +define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 { +; CHECK-LABEL: @bad_mask_transition_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to i512 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK: 6: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] +; CHECK-NEXT: unreachable +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP8]] to i8 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP9]] to i16 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[CONV]] to <16 x i1> +; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP2]] +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]] +; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP16]], <16 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[F]], <16 x float> [[E]] +; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <16 x float> [[TMP17]] +; +entry: + %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> , i32 4) + %1 = bitcast <8 x i1> %0 to i8 + %conv = zext i8 %1 to i16 + %2 = bitcast i16 %conv to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e + ret <16 x float> %3 +} + +declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>) +declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>) +declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>) +declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>) +declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>) +declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>) +declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>) +declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>) + +attributes #0 = { sanitize_memory } From 0aa72b67469446cb3614c8726413c8094b4368b2 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Sun, 26 Jan 2025 23:29:45 +0000 Subject: [PATCH 3/4] Move to X86 subdirectory --- .../MemorySanitizer/{ => X86}/avx512-intrinsics-upgrade.ll | 0 .../MemorySanitizer/{ => X86}/avx512-intrinsics.ll | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/Instrumentation/MemorySanitizer/{ => X86}/avx512-intrinsics-upgrade.ll (100%) rename llvm/test/Instrumentation/MemorySanitizer/{ => X86}/avx512-intrinsics.ll (100%) diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll similarity index 100% rename from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll rename to llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll similarity index 100% rename from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll rename to llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll From af72ee8c34b11c58afe6e98bcc27090b11cd3257 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Sun, 26 Jan 2025 23:32:11 +0000 Subject: [PATCH 4/4] Fix source information --- .../MemorySanitizer/X86/avx512-intrinsics-upgrade.ll | 2 +- .../Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll index f74858cb0ed516..edb618fdfb8fbe 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s ; -; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics-upgrade.ll +; Forked from llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll index 11f72ce39b0b8f..052b497831ee12 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s ; -; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll +; Forked from llvm/test/CodeGen/X86/avx512-intrinsics.ll define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 { ; CHECK-LABEL: @test_mask_compress_pd_512(