Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
merge promotion/amd-mainline-open/2023.02.01 into amd-mainline-open
Browse files Browse the repository at this point in the history
  Bulk Promotion for Feb 01, 2023
  Targetting ROCm 5.6

Merge remote-tracking branch 'gerritgit/promotion/amd-mainline-open/2023.02.01' into HEAD

Change-Id: Icfc0ccb6e121d60d412be5ef5c1b6342c93fc2df
  • Loading branch information
David Salinas committed Mar 23, 2023
2 parents 49dd756 + f098504 commit 3465f2e
Show file tree
Hide file tree
Showing 115 changed files with 2,178 additions and 1,779 deletions.
663 changes: 663 additions & 0 deletions asanrtl/src/dm.cl

Large diffs are not rendered by default.

20 changes: 2 additions & 18 deletions irif/inc/irif.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,15 @@

#define REQUIRES_16BIT_INSTS __attribute__((target("16-bit-insts")))
#define REQUIRES_GFX9_INSTS __attribute__((target("gfx9-insts")))
#define REQUIRES_WAVE32 __attribute__((target("wavefrontsize32")))
#define REQUIRES_WAVE64 __attribute__((target("wavefrontsize64")))

// Generic intrinsics
extern __attribute__((const)) float2 __llvm_fma_2f32(float2, float2, float2) __asm("llvm.fma.v2f32");

extern __attribute__((const)) half2 __llvm_fma_2f16(half2, half2, half2) __asm("llvm.fma.v2f16");
extern __attribute__((const)) half2 __llvm_fabs_2f16(half2) __asm("llvm.fabs.v2f16");
extern __attribute__((const)) half2 __llvm_minnum_2f16(half2, half2) __asm("llvm.minnum.v2f16");
extern __attribute__((const)) half2 __llvm_maxnum_2f16(half2, half2) __asm("llvm.maxnum.v2f16");
extern __attribute__((const)) half2 __llvm_copysign_2f16(half2, half2) __asm("llvm.copysign.v2f16");
extern __attribute__((const)) half2 __llvm_floor_2f16(half2) __asm("llvm.floor.v2f16");
extern __attribute__((const)) half2 __llvm_ceil_2f16(half2) __asm("llvm.ceil.v2f16");
extern __attribute__((const)) half2 __llvm_trunc_2f16(half2) __asm("llvm.trunc.v2f16");
extern __attribute__((const)) half2 __llvm_round_2f16(half2) __asm("llvm.round.v2f16");
extern __attribute__((const)) half2 __llvm_rint_2f16(half2) __asm("llvm.rint.v2f16");
extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm.canonicalize.v2f16");

#define BUILTIN_CLZ_U8(x) (uchar)(x == 0u ? 8 : __builtin_clz(x) - 24)
#define BUILTIN_CLZ_U16(x) (ushort)(x == 0u ? 16 : __builtin_clzs(x))
Expand All @@ -38,16 +32,6 @@ extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm.
#define BUILTIN_CTZ_U32(x) (uint)(x == 0u ? 32 : __builtin_ctz(x))
#define BUILTIN_CTZ_U64(x) (ulong)(x == 0u ? 64 : __builtin_ctzl(x))

// AMDGPU intrinsics
extern __attribute__((const, convergent)) ulong __llvm_amdgcn_icmp_i64_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i64.i32");
extern __attribute__((const, convergent)) ulong __llvm_amdgcn_icmp_i64_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i64.i64");
extern __attribute__((const, convergent)) ulong __llvm_amdgcn_fcmp_i64_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i64.f32");
extern __attribute__((const, convergent)) ulong __llvm_amdgcn_fcmp_i64_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i64.f64");
extern __attribute__((const, convergent)) uint __llvm_amdgcn_icmp_i32_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i32.i32");
extern __attribute__((const, convergent)) uint __llvm_amdgcn_icmp_i32_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i32.i64");
extern __attribute__((const, convergent)) uint __llvm_amdgcn_fcmp_i32_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i32.f32");
extern __attribute__((const, convergent)) uint __llvm_amdgcn_fcmp_i32_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i32.f64");

// Buffer Load/Store
extern __attribute__((pure)) float4 __llvm_amdgcn_struct_buffer_load_format_v4f32(uint4 rsrc, uint vindex, uint voffset, uint soffset, uint cachepolicy) __asm("llvm.amdgcn.struct.buffer.load.format.v4f32");
extern __attribute__((pure)) half4 __llvm_amdgcn_struct_buffer_load_format_v4f16(uint4 rsrc, uint vindex, uint voffset, uint soffset, uint cachepolicy) __asm("llvm.amdgcn.struct.buffer.load.format.v4f16");
Expand Down
707 changes: 707 additions & 0 deletions irif/src/base-image-intrinsics.ll

Large diffs are not rendered by default.

37 changes: 0 additions & 37 deletions irif/src/cz.ll

This file was deleted.

Loading

0 comments on commit 3465f2e

Please sign in to comment.