From 21a87c8db3cd9f5718470b187c811104bc89d675 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland@gmail.com>
Date: Mon, 27 Feb 2023 12:38:30 -0800
Subject: [PATCH] [SingleSource/Vectorizer] Add unit tests for conditional
 scalar assignment pattern.

Dedicated unit tests for loops which contain a conditional assignment inside
the loop body.

An earlier version of this patch was posted as https://reviews.llvm.org/D144909.
Some updates have been made to cover more test cases.

I tested this patch with the CSA patch applied (I will add a link once the CSA
patch is posted) using the following options:
```
-O3 -mllvm -enable-csa-vectorization -mcpu=sifive-x280
```
I also ran it with `-mllvm -force-tail-folding-style=data-with-evl` appended
to the previous option set as well as `-mllvm -force-tail-folding-style=none`.

I also verified that vector code was generated for functions that we are
currently able to vectorize using objdump. This patch contains some examples
that we are not able to vectorize today but should be able to in the future.
---
 SingleSource/UnitTests/Vectorizer/common.h    |  52 ++-
 .../conditional_scalar_assignment.cpp         | 339 ++++++++++++++++++
 ...itional_scalar_assignment.reference_output |  23 ++
 3 files changed, 410 insertions(+), 4 deletions(-)
 create mode 100644 SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.cpp
 create mode 100644 SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.reference_output
diff --git a/SingleSource/UnitTests/Vectorizer/common.h b/SingleSource/UnitTests/Vectorizer/common.h
index d8cd421bf3..a5424061e1 100644
--- a/SingleSource/UnitTests/Vectorizer/common.h
+++ b/SingleSource/UnitTests/Vectorizer/common.h
@@ -1,5 +1,6 @@
 #include <memory>
 #include <random>
+#include <type_traits>
 
 #define DEFINE_SCALAR_AND_VECTOR_FN2(Init, Loop)                               \
   auto ScalarFn = [](auto *A, auto *B, unsigned TC) {                          \
@@ -17,6 +18,16 @@
     _Pragma("clang loop vectorize(enable)") Loop                               \
   };
 
+#define DEFINE_SCALAR_AND_VECTOR_FN4(Init, Loop)                               \
+  auto ScalarFn = [](auto *cond0, auto *cond1, auto *data0, auto *data1,       \
+                     unsigned N, int x) {                                      \
+    Init _Pragma("clang loop vectorize(disable)") Loop                         \
+  };                                                                           \
+  auto VectorFn = [](auto *cond0, auto *cond1, auto *data0, auto *data1,       \
+                     unsigned N, int x) {                                      \
+    Init _Pragma("clang loop vectorize(enable)") Loop                          \
+  };
+
 #define DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(InnerLoopCode)                     \
   auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) {   \
     for (unsigned long i = 0; i < OuterTC; i++) {                              \
@@ -55,15 +66,40 @@
 
 static std::mt19937 rng;
 
-// Initialize arrays A with random numbers.
-template <typename Ty>
-static void init_data(const std::unique_ptr<Ty[]> &A, unsigned N) {
+// Initialize arrays A with random integers.
+template <typename Int,
+          std::enable_if_t<std::is_integral<Int>::value, bool> = true>
+static void init_data(const std::unique_ptr<Int[]> &A, unsigned N) {
   std::uniform_int_distribution<uint64_t> distrib(
-      std::numeric_limits<Ty>::min(), std::numeric_limits<Ty>::max());
+      std::numeric_limits<Int>::min(), std::numeric_limits<Int>::max());
   for (unsigned i = 0; i < N; i++)
     A[i] = distrib(rng);
 }
 
+// Initialize arrays A with random floating points.
+template <typename Float,
+          std::enable_if_t<std::is_floating_point<Float>::value, bool> = true>
+static void init_data(const std::unique_ptr<Float[]> &A, unsigned N) {
+  std::uniform_real_distribution<float> distrib(
+      std::numeric_limits<Float>::min(), std::numeric_limits<Float>::max());
+  for (unsigned i = 0; i < N; i++)
+    A[i] = distrib(rng);
+}
+
+template <typename Ptr,
+          std::enable_if_t<std::is_pointer<Ptr>::value, bool> = true>
+static void init_data(const std::unique_ptr<Ptr[]> &A, unsigned N) {
+  for (unsigned i = 0; i < N; i++)
+    A[i] = nullptr;
+}
+
+// Initialize arrays A with random booleans.
+static void init_cond(const std::unique_ptr<bool[]> &A, unsigned N) {
+  std::uniform_int_distribution<uint64_t> distrib(0, 1);
+  for (unsigned i = 0; i < N; i++)
+    A[i] = !!distrib(rng);
+}
+
 template <typename Ty>
 static void check(const std::unique_ptr<Ty[]> &Reference,
                   const std::unique_ptr<Ty[]> &Tmp, unsigned NumElements) {
@@ -72,3 +108,11 @@ static void check(const std::unique_ptr<Ty[]> &Reference,
     exit(1);
   }
 }
+
+template <typename Ty>
+static void check(const Ty Reference, const Ty ToCheck, unsigned NumElements) {
+  if (Reference != ToCheck) {
+    std::cerr << "Miscompare\n";
+    exit(1);
+  }
+}
diff --git a/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.cpp b/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.cpp
new file mode 100644
index 0000000000..552996196a
--- /dev/null
+++ b/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.cpp
@@ -0,0 +1,339 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <functional>
+#include <iostream>
+
+#include "common.h"
+
+template <typename Ty>
+using Fn4Ty = std::function<Ty(bool *, bool *, Ty *, Ty *, unsigned, int)>;
+
+template <typename Ty>
+static void checkVectorFunction(Fn4Ty<Ty> ScalarFn, Fn4Ty<Ty> VectorFn,
+                                const char *Name) {
+  std::cout << "Checking " << Name << "\n";
+
+  unsigned N = 1000;
+  std::unique_ptr<bool[]> Cond0(new bool[N]);
+  std::unique_ptr<bool[]> Cond1(new bool[N]);
+  std::unique_ptr<Ty[]> Data0(new Ty[N]);
+  std::unique_ptr<Ty[]> Data1(new Ty[N]);
+  init_cond(Cond0, N);
+  init_cond(Cond1, N);
+  init_data(Data0, N);
+  init_data(Data1, N);
+  int X = 10;
+  
+  Ty Reference = ScalarFn(&Cond0[0], &Cond1[0], &Data0[0], &Data1[0], N, X);
+  Ty ToCheck = VectorFn(&Cond0[0], &Cond1[0], &Data0[0], &Data1[0], N, X);
+  check(Reference, ToCheck, N);
+}
+
+int main(void) {
+  rng = std::mt19937(15);
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1;,
+      for (int i = 0; i < N; i++) {
+        if (x < data0[i])
+          t = data0[i];
+      }
+      return t;);
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn, "simple_csa_int_select");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1;,
+      for (int i = 0; i < N; i++) {
+        if (i < data0[i])
+          t = data0[i];
+      }
+      return t;);
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "simple_csa_int_select_induction_cmp");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (0.0f < data0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn, "simple_csa_float_select");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn, "simple_csa_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn, "simple_csa_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1; int s = -1;,
+      for (int i = 0; i < N; i++) {
+        if (x < data0[i])
+          t = data0[i];
+        if (x < data1[i])
+          s = data1[i];
+      }
+      return t | s; // use t and s
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_in_series_int_select");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1; int s = -1;,
+      for (int i = 0; i < N; i++) {
+        if (i < data0[i])
+          t = data0[i];
+        if (i < data1[i])
+          s = data1[i];
+      }
+      return t | s; // use t and s
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_in_series_int_select_induction_cmp");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f; float s = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (0.0f < data0[i])
+          t = data0[i];
+        if (0.0f < data1[i])
+          s = data1[i];
+      }
+      return t + s; // use t and s
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn,
+                               "csa_in_series_float_select");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1; int s = -1;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        if (cond1[i])
+          s = data1[i];
+      }
+      return t | s; // use t and s
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn, "csa_in_series_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f; float s = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        if (cond1[i])
+          s = data1[i];
+      }
+      return t + s; // use t and s
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn, "csa_in_series_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        if (cond1[i])
+          t = data1[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_in_series_same_scalar_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (0.0f < data0[i])
+          t = data0[i];
+        if (0.0f < data1[i])
+          t = data1[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_in_series_same_scalar_float_select");
+  }
+
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        if (cond1[i])
+          t = data1[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_in_series_same_scalar_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1; int s = -1;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i]) {
+          t = data0[i];
+          s = data1[i];
+        }
+      }
+      return t | s; // use t and s
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn, "csa_same_cond_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f; float s = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i]) {
+          t = data0[i];
+          s = data1[i];
+        }
+      }
+      return t + s; // use t and s
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn, "csa_same_cond_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = -1;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        else if (cond1[i])
+          t = data1[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn,
+                                  "csa_else_if_same_scalar_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        else if (cond1[i])
+          t = data1[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn,
+                               "csa_else_if_same_scalar_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+       int t = -1; int s = -1;,
+       for (int i = 0; i < N; i++) {
+         if (cond0[i])
+           t = data0[i];
+         else if (cond1[i])
+           s = data1[i];
+       }
+       return t | s; // use t and s
+    );
+    checkVectorFunction<uint32_t>(ScalarFn, VectorFn, "csa_else_if_int");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      float t = 1.0f; float s = 1.0f;,
+      for (int i = 0; i < N; i++) {
+        if (cond0[i])
+          t = data0[i];
+        else if (cond1[i])
+          s = data1[i];
+      }
+      return t + s; // use t and s
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn, "csa_else_if_float");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int *t = nullptr;,
+      for (int i = 0; i < N; i++) {
+        if (x < *data0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<int *>(ScalarFn, VectorFn, "simple_csa_ptr_select");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int *t = nullptr;,
+      for (int i = 0; i < N; i++) {
+        if (x < *data0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<int*>(ScalarFn, VectorFn,
+                               "simple_csa_ptr_select_induction_cmp");
+  }
+
+  {
+    DEFINE_SCALAR_AND_VECTOR_FN4(
+      int t = 0;,
+      for (int i = 0; i < N; i++) {
+        if (x != data0[i])
+          t = data0[i];
+      }
+      return t; // use t
+    );
+    checkVectorFunction<float>(ScalarFn, VectorFn,
+                               "simple_csa_int_select_neg_cond");
+  }
+
+  return 0;
+}
diff --git a/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.reference_output b/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.reference_output
new file mode 100644
index 0000000000..d175340a92
--- /dev/null
+++ b/SingleSource/UnitTests/Vectorizer/conditional_scalar_assignment.reference_output
@@ -0,0 +1,23 @@
+Checking simple_csa_int_select
+Checking simple_csa_int_select_induction_cmp
+Checking simple_csa_float_select
+Checking simple_csa_int
+Checking simple_csa_float
+Checking csa_in_series_int_select
+Checking csa_in_series_int_select_induction_cmp
+Checking csa_in_series_float_select
+Checking csa_in_series_int
+Checking csa_in_series_float
+Checking csa_in_series_same_scalar_int
+Checking csa_in_series_same_scalar_float_select
+Checking csa_in_series_same_scalar_float
+Checking csa_same_cond_int
+Checking csa_same_cond_float
+Checking csa_else_if_same_scalar_int
+Checking csa_else_if_same_scalar_float
+Checking csa_else_if_int
+Checking csa_else_if_float
+Checking simple_csa_ptr_select
+Checking simple_csa_ptr_select_induction_cmp
+Checking simple_csa_int_select_neg_cond
+exit 0