Merge pull request #47 from RadeonOpenCompute/roc-1.6.4

roc-1.6.4 updates
ROCm · Oct 30, 2017 · c36d9f7 · c36d9f7
2 parents e807ef7 + c6b7afe
commit c36d9f7
Show file tree

Hide file tree

Showing 325 changed files with 4,133 additions and 1,199 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -33,6 +33,9 @@ if (GENERIC_IS_ZERO)
   set(AMDGPU_TARGET_TRIPLE "amdgcn--amdhsa-amdgizcl")
   # HCC will execute utils/change-addr-space.sh
   # and apply utils/add_amdgiz.sed on all .ll files in subdirectory hc/, irif/, opencl/
+  if (CUDA_TRIPLE)
+    set(AMDGPU_TARGET_TRIPLE "amdgcn--cuda")
+  endif (CUDA_TRIPLE)
 
 endif (GENERIC_IS_ZERO)
 
@@ -52,6 +55,9 @@ add_subdirectory(oclc)
 add_subdirectory(ocml)
 add_subdirectory(ockl)
 add_subdirectory(opencl)
+if (CUDA_TRIPLE)
+  add_subdirectory(cuda2gcn)
+endif (CUDA_TRIPLE)
 
 if(BUILD_HC_LIB)
   add_subdirectory(hc)

diff --git a/cuda2gcn/CMakeLists.txt b/cuda2gcn/CMakeLists.txt
@@ -0,0 +1,17 @@
+##===--------------------------------------------------------------------------
+##                   ROCm Device Libraries
+##
+## This file is distributed under the University of Illinois Open Source
+## License. See LICENSE.TXT for details.
+##===--------------------------------------------------------------------------
+
+file(GLOB cl_sources
+  ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cl
+)
+
+file(GLOB sources ${cl_sources})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../ocml/inc)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../ockl/inc)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../irif/inc)
+opencl_bc_lib(cuda2gcn ${sources})
diff --git a/cuda2gcn/src/bitsbytes.cl b/cuda2gcn/src/bitsbytes.cl
@@ -0,0 +1,46 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "ockl.h"
+#include "irif.h"
+
+#define ATTR __attribute__((always_inline, const))
+
+//-------- T __nv_brev
+ATTR int __nv_brev(int x) { return __llvm_bitreverse_i32(x); }
+
+//-------- T __nv_brevll
+ATTR long __nv_brevll(long x) { return __llvm_bitreverse_i64(x); }
+
+//-------- T __nv_clz
+ATTR int __nv_clz(int x)
+{
+    return (int)__ockl_clz_u32((uint)x);
+}
+
+//-------- T __nv_clzll
+ATTR int __nv_clzll(long x)
+{
+    uint xlo = (uint)x;
+    uint xhi = (uint)(x >> 32);
+    uint zlo = __ockl_clz_u32(xlo) + 32u;
+    uint zhi = __ockl_clz_u32(xhi);
+    return (int)(xhi == 0 ? zlo : zhi);
+}
+
+//-------- T __nv_ffs
+ATTR int __nv_ffs(int x) { return (32 - __nv_clz(x&(-x))); }
+
+//-------- T __nv_ffsll
+ATTR int __nv_ffsll(long x) { return (int)(64 - __nv_clzll(x&(-x))); }
+
+//-------- T __nv_popc
+ATTR int __nv_popc(int x) { return __llvm_ctpop_i32(x); }
+
+//-------- T __nv_popcll
+ATTR int __nv_popcll(long x) { return (int)__llvm_ctpop_i64(x); }
+
diff --git a/cuda2gcn/src/convert.cl b/cuda2gcn/src/convert.cl
@@ -0,0 +1,150 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#define ATTR __attribute__((always_inline, const))
+
+#define CONVERTM(A,B,m,n) ATTR B __nv_##A##2##B##_##m(A x) \
+    { return convert_##B##_##n(x); }
+
+#define CONVERT(A,B) \
+    CONVERTM(A, B, rd, rtn) \
+    CONVERTM(A, B, rn, rte) \
+    CONVERTM(A, B, ru, rtp) \
+    CONVERTM(A, B, rz, rtz)
+
+//-------- T __nv_double2float_rd
+//-------- T __nv_double2float_rn
+//-------- T __nv_double2float_ru
+//-------- T __nv_double2float_rz
+CONVERT(double, float)
+
+//-------- T __nv_double2int_rd
+//-------- T __nv_double2int_rn
+//-------- T __nv_double2int_ru
+//-------- T __nv_double2int_rz
+CONVERT(double, int)
+
+//-------- T __nv_float2int_rd
+//-------- T __nv_float2int_rn
+//-------- T __nv_float2int_ru
+//-------- T __nv_float2int_rz
+CONVERT(float, int)
+
+//-------- T __nv_int2float_rd
+//-------- T __nv_int2float_rn
+//-------- T __nv_int2float_ru
+//-------- T __nv_int2float_rz
+CONVERT(int, float)
+
+//-------- T __nv_double2uint_rd
+//-------- T __nv_double2uint_rn
+//-------- T __nv_double2uint_ru
+//-------- T __nv_double2uint_rz
+CONVERT(double, uint)
+
+//-------- T __nv_float2uint_rd
+//-------- T __nv_float2uint_rn
+//-------- T __nv_float2uint_ru
+//-------- T __nv_float2uint_rz
+CONVERT(float, uint)
+
+//-------- T __nv_uint2double_rd
+//-------- T __nv_uint2double_rn
+//-------- T __nv_uint2double_ru
+//-------- T __nv_uint2double_rz
+CONVERT(uint, double)
+
+//-------- T __nv_uint2float_rd
+//-------- T __nv_uint2float_rn
+//-------- T __nv_uint2float_ru
+//-------- T __nv_uint2float_rz
+CONVERT(uint, float)
+
+#define CONVERT2LLM(A,B,m,n) ATTR long __nv_##A##2ll_##m(A x) \
+    { return convert_long_##n(x); }
+
+#define CONVERT2LL(A) \
+    CONVERT2LLM(A, long, rd, rtn) \
+    CONVERT2LLM(A, long, rn, rte) \
+    CONVERT2LLM(A, long, ru, rtp) \
+    CONVERT2LLM(A, long, rz, rtz)
+
+//-------- T __nv_double2ll_rd
+//-------- T __nv_double2ll_rn
+//-------- T __nv_double2ll_ru
+//-------- T __nv_double2ll_rz
+CONVERT2LL(double)
+
+//-------- T __nv_float2ll_rd
+//-------- T __nv_float2ll_rn
+//-------- T __nv_float2ll_ru
+//-------- T __nv_float2ll_rz
+CONVERT2LL(float)
+
+#define CONVERT2ULLM(A,B,m,n) ATTR ulong __nv_##A##2ull_##m(A x) \
+    { return convert_ulong_##n(x); }
+
+#define CONVERT2ULL(A) \
+    CONVERT2ULLM(A, ulong, rd, rtn) \
+    CONVERT2ULLM(A, ulong, rn, rte) \
+    CONVERT2ULLM(A, ulong, ru, rtp) \
+    CONVERT2ULLM(A, ulong, rz, rtz)
+
+//-------- T __nv_double2ull_rd
+//-------- T __nv_double2ull_rn
+//-------- T __nv_double2ull_ru
+//-------- T __nv_double2ull_rz
+CONVERT2ULL(double)
+
+//-------- T __nv_float2ull_rd
+//-------- T __nv_float2ull_rn
+//-------- T __nv_float2ull_ru
+//-------- T __nv_float2ull_rz
+CONVERT2ULL(float)
+
+#define CONVERT4LLM(A,B,m,n) ATTR B __nv_ll2##B##_##m(long x) \
+    { return convert_##B##_##n(x); }
+
+#define CONVERT4LL(B) \
+    CONVERT4LLM(long, B, rd, rtn) \
+    CONVERT4LLM(long, B, rn, rte) \
+    CONVERT4LLM(long, B, ru, rtp) \
+    CONVERT4LLM(long, B, rz, rtz)
+
+//-------- T __nv_ll2double_rd
+//-------- T __nv_ll2double_rn
+//-------- T __nv_ll2double_ru
+//-------- T __nv_ll2double_rz
+CONVERT4LL(double)
+
+//-------- T __nv_ll2float_rd
+//-------- T __nv_ll2float_rn
+//-------- T __nv_ll2float_ru
+//-------- T __nv_ll2float_rz
+CONVERT4LL(float)
+
+#define CONVERT4ULLM(A,B,m,n) ATTR B __nv_ull2##B##_##m(ulong x) \
+    { return convert_##B##_##n(x); }
+
+#define CONVERT4ULL(B) \
+    CONVERT4ULLM(ulong, B, rd, rtn) \
+    CONVERT4ULLM(ulong, B, rn, rte) \
+    CONVERT4ULLM(ulong, B, ru, rtp) \
+    CONVERT4ULLM(ulong, B, rz, rtz)
+
+//-------- T __nv_ull2double_rd
+//-------- T __nv_ull2double_rn
+//-------- T __nv_ull2double_ru
+//-------- T __nv_ull2double_rz
+CONVERT4ULL(double)
+
+//-------- T __nv_ull2float_rd
+//-------- T __nv_ull2float_rn
+//-------- T __nv_ull2float_ru
+//-------- T __nv_ull2float_rz
+CONVERT4ULL(float)
+
diff --git a/cuda2gcn/src/float.cl b/cuda2gcn/src/float.cl
@@ -0,0 +1,33 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#define ATTR __attribute__((always_inline, const))
+
+//-------- T __nv_finitef
+ATTR int __nv_finitef(float x) { return isfinite(x); }
+
+//-------- T __nv_isfinited
+ATTR int __nv_isfinited(double x) { return isfinite(x); }
+
+//-------- T __nv_isinfd
+ATTR int __nv_isinfd(double x) { return isinf(x); }
+
+//-------- T __nv_isinff
+ATTR int __nv_isinff(float x) { return isinf(x); }
+
+//-------- T __nv_isnand
+ATTR int __nv_isnand(double x) { return isnan(x); }
+
+//-------- T __nv_isnanf
+ATTR int __nv_isnanf(float x) { return isnan(x); }
+
+//-------- T __nv_nan
+ATTR double __nv_nan(char *tagp) { return __builtin_nan(tagp); }
+
+//-------- T __nv_nanf
+ATTR float __nv_nanf(char *tagp) { return __builtin_nan(tagp); }
+
diff --git a/cuda2gcn/src/generic.cl b/cuda2gcn/src/generic.cl
@@ -0,0 +1,54 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#define ATTR __attribute__((always_inline, const))
+
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+//-------- T __nv_abs
+ATTR int __nv_abs(int x) { return abs(x); }
+
+//-------- T __nv_llabs
+ATTR long __nv_llabs(long x) { return abs(x); }
+
+//-------- T __nv_max
+ATTR int __nv_max(int a, int b) { return MAX(a,b); }
+
+//-------- T __nv_llmax
+ATTR long __nv_llmax(long a, long b) { return MAX(a,b); }
+
+//-------- T __nv_ullmax
+ATTR ulong __nv_ullmax(ulong a, ulong b) { return MAX(a,b); }
+
+//-------- T __nv_umax
+ATTR uint __nv_umax(uint a, uint b) { return MAX(a,b); }
+
+//-------- T __nv_min
+ATTR int __nv_min(int a, int b) { return MIN(a,b); }
+
+//-------- T __nv_llmin
+ATTR long __nv_llmin(long a, long b) { return MIN(a,b); }
+
+//-------- T __nv_ullmin
+ATTR ulong __nv_ullmin(ulong a, ulong b) { return MIN(a,b); }
+
+//-------- T __nv_umin
+ATTR uint __nv_umin(uint a, uint b) { return MIN(a,b); }
+
+//-------- T __nv_sad
+ATTR uint __nv_sad(int x, int y, uint z)
+{
+    return (z+abs(x-y));
+}
+
+//-------- T __nv_usad
+ATTR uint __nv_usad(uint x, uint y, uint z)
+{
+    return (z+abs(x-y));
+}
+
diff --git a/cuda2gcn/src/half.cl b/cuda2gcn/src/half.cl
@@ -0,0 +1,23 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define ATTR __attribute__((always_inline, const))
+
+//-------- T __nv_float2half_rn
+half __nv_float2half_rn(float x)
+{
+    return (half)x;
+}
+
+//-------- T __nv_half2float
+float __nv_half2float(half x)
+{
+    return (float)x;
+}
+
diff --git a/cuda2gcn/src/integer.cl b/cuda2gcn/src/integer.cl
@@ -0,0 +1,29 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "ockl.h"
+
+#define ATTR __attribute__((always_inline, const))
+
+//-------- T __nv_mul24
+ATTR int __nv_mul24(int x, int y) { return __ockl_mul24_i32(x, y); }
+
+//-------- T __nv_umul24
+ATTR uint __nv_umul24(uint x, uint y) { return __ockl_mul24_u32(x, y); }
+
+//-------- T __nv_mul64hi
+ATTR long __nv_mul64hi(long x, long y) { return __ockl_mul_hi_i64(x,y); }
+
+//-------- T __nv_mulhi
+ATTR int __nv_mulhi(int x, int y) { return __ockl_mul_hi_i32(x,y); }
+
+//-------- T __nv_umul64hi
+ATTR ulong __nv_umul64hi(ulong x, ulong y) { return __ockl_mul_hi_u64(x,y); }
+
+//-------- T __nv_umulhi
+ATTR uint __nv_umulhi(uint x, uint y) { return __ockl_mul_hi_u32(x,y); }
+