Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
Merge pull request #48 from RadeonOpenCompute/roc-1.7.0
Browse files Browse the repository at this point in the history
roc-1.7.0 updates
  • Loading branch information
kzhuravl authored Nov 7, 2017
2 parents c36d9f7 + e99c6d6 commit 197e51e
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 311 deletions.
9 changes: 5 additions & 4 deletions hc/src/hc_amdgcn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -192,20 +192,20 @@ define i32 @__atomic_wrapdec(i32 addrspace(4)* nocapture %addr, i32 %val) #1 {
; llvm.amdgcn.atomic.dec.i32.p4i32 <addr> <val> <ordering> <scope> <is_volatile>
declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #4

define i64 @__clock_u64() #1 {
define i64 @__clock_u64() #8 {
%ret = tail call i64 @llvm.amdgcn.s.memrealtime()
ret i64 %ret
}

declare i64 @llvm.amdgcn.s.memrealtime() #1
declare i64 @llvm.amdgcn.s.memrealtime() #8


define i64 @__cycle_u64() #1 {
define i64 @__cycle_u64() #8 {
%ret = tail call i64 @llvm.amdgcn.s.memtime()
ret i64 %ret
}

declare i64 @llvm.amdgcn.s.memtime() #1
declare i64 @llvm.amdgcn.s.memtime() #8

define i32 @get_group_segment_size() #0 {
%1 = call i32 @llvm.amdgcn.s.getreg(i32 17158) #0
Expand Down Expand Up @@ -253,4 +253,5 @@ attributes #4 = { convergent nounwind }
attributes #5 = { alwaysinline nounwind }
attributes #6 = { alwaysinline norecurse nounwind readnone }
attributes #7 = { norecurse nounwind readnone }
attributes #8 = { alwaysinline nounwind inaccessiblememonly norecurse }
attributes #9 = { convergent nounwind readnone }
47 changes: 15 additions & 32 deletions hc/src/hc_kernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ amp_get_global_id(int dim)

switch(dim) {
case 0:
l = __llvm_amdgcn_workitem_id_x();
g = __llvm_amdgcn_workgroup_id_x();
l = __builtin_amdgcn_workitem_id_x();
g = __builtin_amdgcn_workgroup_id_x();
s = p->workgroup_size_x;
break;
case 1:
l = __llvm_amdgcn_workitem_id_y();
g = __llvm_amdgcn_workgroup_id_y();
l = __builtin_amdgcn_workitem_id_y();
g = __builtin_amdgcn_workgroup_id_y();
s = p->workgroup_size_y;
break;
case 2:
l = __llvm_amdgcn_workitem_id_z();
g = __llvm_amdgcn_workgroup_id_z();
l = __builtin_amdgcn_workitem_id_z();
g = __builtin_amdgcn_workgroup_id_z();
s = p->workgroup_size_z;
break;
default:
Expand Down Expand Up @@ -59,11 +59,11 @@ amp_get_local_id(int dim)
{
switch(dim) {
case 0:
return __llvm_amdgcn_workitem_id_x();
return __builtin_amdgcn_workitem_id_x();
case 1:
return __llvm_amdgcn_workitem_id_y();
return __builtin_amdgcn_workitem_id_y();
case 2:
return __llvm_amdgcn_workitem_id_z();
return __builtin_amdgcn_workitem_id_z();
default:
return 0;
}
Expand Down Expand Up @@ -102,11 +102,11 @@ amp_get_group_id(int dim)
{
switch(dim) {
case 0:
return __llvm_amdgcn_workgroup_id_x();
return __builtin_amdgcn_workgroup_id_x();
case 1:
return __llvm_amdgcn_workgroup_id_y();
return __builtin_amdgcn_workgroup_id_y();
case 2:
return __llvm_amdgcn_workgroup_id_z();
return __builtin_amdgcn_workgroup_id_z();
default:
return 0;
}
Expand All @@ -115,24 +115,7 @@ amp_get_group_id(int dim)
ATTR uint
amp_get_local_size(int dim)
{
__constant hsa_kernel_dispatch_packet_t *p = __llvm_amdgcn_dispatch_ptr();
uint d;

switch(dim) {
case 0:
d = p->workgroup_size_x;
break;
case 1:
d = p->workgroup_size_y;
break;
case 2:
d = p->workgroup_size_z;
break;
default:
d = 1;
break;
}
return d;
return __ockl_get_local_size(dim);
}

ATTR uint
Expand Down Expand Up @@ -176,10 +159,10 @@ hc_work_group_barrier(cl_mem_fence_flags flags, memory_scope scope)
{
if (flags) {
atomic_work_item_fence(flags, memory_order_release, scope);
__llvm_amdgcn_s_barrier();
__builtin_amdgcn_s_barrier();
atomic_work_item_fence(flags, memory_order_acquire, scope);
} else {
__llvm_amdgcn_s_barrier();
__builtin_amdgcn_s_barrier();
}
}

Expand Down
33 changes: 0 additions & 33 deletions irif/inc/irif.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,27 +99,6 @@ extern __attribute__((const)) float __llvm_canonicalize_f32(float) __asm("llvm.c
extern __attribute__((const)) double __llvm_canonicalize_f64(double) __asm("llvm.canonicalize.f64");

// Intrinsics requiring wrapping
extern bool __llvm_sadd_with_overflow_i16(short, short, __private short*);
extern bool __llvm_uadd_with_overflow_i16(ushort, ushort, __private ushort*);
extern bool __llvm_sadd_with_overflow_i32(int, int, __private int*);
extern bool __llvm_uadd_with_overflow_i32(uint, uint, __private uint*);
extern bool __llvm_sadd_with_overflow_i64(long, long, __private long*);
extern bool __llvm_uadd_with_overflow_i64(ulong, ulong, __private ulong*);

extern bool __llvm_ssub_with_overflow_i16(short, short, __private short*);
extern bool __llvm_usub_with_overflow_i16(ushort, ushort, __private ushort*);
extern bool __llvm_ssub_with_overflow_i32(int, int, __private int*);
extern bool __llvm_usub_with_overflow_i32(uint, uint, __private uint*);
extern bool __llvm_ssub_with_overflow_i64(long, long, __private long*);
extern bool __llvm_usub_with_overflow_i64(ulong, ulong, __private ulong*);

extern bool __llvm_smul_with_overflow_i16(short, short, __private short*);
extern bool __llvm_umul_with_overflow_i16(ushort, ushort, __private ushort*);
extern bool __llvm_smul_with_overflow_i32(int, int, __private int*);
extern bool __llvm_umul_with_overflow_i32(uint, uint, __private uint*);
extern bool __llvm_smul_with_overflow_i64(long, long, __private long*);
extern bool __llvm_umul_with_overflow_i64(ulong, ulong, __private ulong*);

extern __attribute__((const)) uchar __llvm_ctlz_i8(uchar);
extern __attribute__((const)) ushort __llvm_ctlz_i16(ushort);
extern __attribute__((const)) uint __llvm_ctlz_i32(uint);
Expand Down Expand Up @@ -313,18 +292,6 @@ extern __attribute__((const)) double __llvm_amdgcn_trig_preop_f64(double, int) _
extern __attribute__((const)) half __llvm_amdgcn_fmed3_f16(half, half, half) __asm("llvm.amdgcn.fmed3.f16");
extern __attribute__((const)) float __llvm_amdgcn_fmed3_f32(float, float, float) __asm("llvm.amdgcn.fmed3.f32");

extern void __llvm_amdgcn_s_sendmsg(uint, uint) __asm("llvm.amdgcn.s.sendmsg");
extern void __llvm_amdgcn_s_barrier(void) __asm("llvm.amdgcn.s.barrier");
extern void __llvm_amdgcn_wave_barrier(void) __asm("llvm.amdgcn.wave.barrier");

extern __attribute__((const)) uint __llvm_amdgcn_workitem_id_x(void) __asm("llvm.amdgcn.workitem.id.x");
extern __attribute__((const)) uint __llvm_amdgcn_workitem_id_y(void) __asm("llvm.amdgcn.workitem.id.y");
extern __attribute__((const)) uint __llvm_amdgcn_workitem_id_z(void) __asm("llvm.amdgcn.workitem.id.z");

extern __attribute__((const)) uint __llvm_amdgcn_workgroup_id_x(void) __asm("llvm.amdgcn.workgroup.id.x");
extern __attribute__((const)) uint __llvm_amdgcn_workgroup_id_y(void) __asm("llvm.amdgcn.workgroup.id.y");
extern __attribute__((const)) uint __llvm_amdgcn_workgroup_id_z(void) __asm("llvm.amdgcn.workgroup.id.z");

extern __attribute__((const)) __constant void *__llvm_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
extern __attribute__((const)) __constant void *__llvm_amdgcn_queue_ptr(void) __asm("llvm.amdgcn.queue.ptr");
extern __attribute__((const)) __constant void *__llvm_amdgcn_kernarg_segment_ptr(void) __asm("llvm.amdgcn.kernarg.segment.ptr");
Expand Down
183 changes: 0 additions & 183 deletions irif/src/overflow.ll

This file was deleted.

8 changes: 4 additions & 4 deletions ockl/src/add_sat.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ __attribute__((always_inline)) int
OCKL_MANGLE_I32(add_sat)(int x, int y)
{
int s;
bool c = __llvm_sadd_with_overflow_i32(x, y, &s);
bool c = __builtin_sadd_overflow(x, y, &s);
int lim = (x >> 31) ^ INT_MAX;
return c ? lim : s;
}
Expand All @@ -21,15 +21,15 @@ __attribute__((always_inline)) uint
OCKL_MANGLE_U32(add_sat)(uint x, uint y)
{
uint s;
bool c = __llvm_uadd_with_overflow_i32(x, y, &s);
bool c = __builtin_uadd_overflow(x, y, &s);
return c ? UINT_MAX : s;
}

__attribute__((always_inline)) long
OCKL_MANGLE_I64(add_sat)(long x, long y)
{
long s;
bool c = __llvm_sadd_with_overflow_i64(x, y, &s);
bool c = __builtin_saddl_overflow(x, y, &s);
long lim = (x >> 63) ^ LONG_MAX;
return c ? lim : s;
}
Expand All @@ -38,7 +38,7 @@ __attribute__((always_inline)) ulong
OCKL_MANGLE_U64(add_sat)(ulong x, ulong y)
{
ulong s;
bool c = __llvm_uadd_with_overflow_i64(x, y, &s);
bool c = __builtin_uaddl_overflow(x, y, &s);
return c ? ULONG_MAX : s;
}

Loading

0 comments on commit 197e51e

Please sign in to comment.