diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll new file mode 100644 index 00000000000000..75fdd00e25f988 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+v -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +define void @sin_f64(ptr nocapture %varray) { +; CHECK-LABEL: define void @sin_f64( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @sin(double [[CONV]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!1 = distinct !{!1, !2} +!2 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @sin_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: define void @sin_f64_intrinsic( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: + ret void +} + +!31 = distinct !{!31, !32} +!32 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64(ptr nocapture %varray) { +; CHECK-LABEL: define void @cos_f64( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @cos(double [[CONV]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: + ret void +} + +!51 = distinct !{!51, !52} +!52 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: define void @cos_f64_intrinsic( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4 +; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double +; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]] +; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + +for.end: + ret void +} + +!71 = distinct !{!71, !72} +!72 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @exp_f32(ptr nocapture %varray) { +; CHECK-LABEL: define void @exp_f32( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float> +; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8 +; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @expf(float [[CONV]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @expf(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!91, !92} +!92 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @exp_f32_intrin(ptr nocapture %varray) { +; CHECK-LABEL: define void @exp_f32_intrin( +; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8) +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float> +; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float> +; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]]) +; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8 +; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float +; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @llvm.exp.f32(float [[CONV]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101 + +for.end: ; preds = %for.body + ret void +} + +!101 = distinct !{!101, !102} +!102 = !{!"llvm.loop.vectorize.enable", i1 true} + +attributes #0 = { nounwind readnone } + +declare double @sin(double) #0 +declare double @llvm.sin.f64(double) #0 +declare double @cos(double) #0 +declare double @llvm.cos.f64(double) #0 +declare float @expf(float) #0 +declare float @llvm.exp.f32(float) #0 +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;.