From a493beadbaa691851843eea0e7b8d9d934627c6b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Dec 2022 14:45:49 -0500 Subject: [PATCH 01/22] Use QNAN_* macros for most nan literals The remaining uses of the QNANBITPATT* macros now point out code that mostly should switch from casting to integer to operating purely with floating point operations. Regex and clang-format. Change-Id: Ia57bd267d2c8f2d446b02b29ffbc63dfff939ce1 --- ocml/src/acoshD.cl | 2 +- ocml/src/acoshF.cl | 2 +- ocml/src/acoshH.cl | 2 +- ocml/src/atan2D.cl | 2 +- ocml/src/atan2F.cl | 2 +- ocml/src/atan2H.cl | 2 +- ocml/src/atan2piD.cl | 2 +- ocml/src/atan2piF.cl | 2 +- ocml/src/atan2piH.cl | 2 +- ocml/src/atanhD.cl | 2 +- ocml/src/atanhF.cl | 2 +- ocml/src/atanhH.cl | 2 +- ocml/src/catanhD.cl | 2 +- ocml/src/catanhF.cl | 2 +- ocml/src/cexpD.cl | 2 +- ocml/src/cexpF.cl | 2 +- ocml/src/cosF.cl | 2 +- ocml/src/cospiF.cl | 2 +- ocml/src/csqrtD.cl | 2 +- ocml/src/ctanhD.cl | 4 ++-- ocml/src/ctanhF.cl | 4 ++-- ocml/src/erfcinvD.cl | 2 +- ocml/src/erfcinvF.cl | 2 +- ocml/src/erfinvD.cl | 2 +- ocml/src/erfinvF.cl | 2 +- ocml/src/hypotD.cl | 2 +- ocml/src/len3D.cl | 2 +- ocml/src/len4D.cl | 2 +- ocml/src/log1pD.cl | 2 +- ocml/src/log1pF.cl | 2 +- ocml/src/logD_base.h | 2 +- ocml/src/powD_base.h | 16 ++++++++-------- ocml/src/powF_base.h | 16 ++++++++-------- ocml/src/powH_base.h | 14 +++++++------- ocml/src/remainderD_base.h | 4 ++-- ocml/src/remainderF_base.h | 4 ++-- ocml/src/remainderH_base.h | 4 ++-- ocml/src/rhypotD.cl | 2 +- ocml/src/rlen3D.cl | 2 +- ocml/src/rlen4D.cl | 2 +- ocml/src/scalbD.cl | 6 +++--- ocml/src/scalbF.cl | 6 +++--- ocml/src/scalbH.cl | 6 +++--- ocml/src/sinF.cl | 2 +- ocml/src/sincosF.cl | 4 ++-- ocml/src/sincospiF.cl | 4 ++-- ocml/src/sinpiF.cl | 2 +- ocml/src/tanF.cl | 2 +- ocml/src/tgammaD.cl | 4 ++-- ocml/src/tgammaF.cl | 2 +- ocml/src/y1D.cl | 2 +- ocml/src/y1F.cl | 2 +- 52 files changed, 86 insertions(+), 86 deletions(-) diff --git a/ocml/src/acoshD.cl b/ocml/src/acoshD.cl index 3cacbf9f..a596ecae 100644 --- a/ocml/src/acoshD.cl +++ b/ocml/src/acoshD.cl @@ -23,7 +23,7 @@ MATH_MANGLE(acosh)(double x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z; - z = x < 1.0 ? AS_DOUBLE(QNANBITPATT_DP64) : z; + z = x < 1.0 ? QNAN_F64 : z; } return z; diff --git a/ocml/src/acoshF.cl b/ocml/src/acoshF.cl index 699330ca..5b60162b 100644 --- a/ocml/src/acoshF.cl +++ b/ocml/src/acoshF.cl @@ -23,7 +23,7 @@ MATH_MANGLE(acosh)(float x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z; - z = x < 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : z; + z = x < 1.0f ? QNAN_F32 : z; } return z; diff --git a/ocml/src/acoshH.cl b/ocml/src/acoshH.cl index 32990b16..c0cc91c5 100644 --- a/ocml/src/acoshH.cl +++ b/ocml/src/acoshH.cl @@ -18,7 +18,7 @@ MATH_MANGLE(acosh)(half hx) ret = (half)(BUILTIN_LOG2_F32(t) * 0x1.62e430p-1f); if (!FINITE_ONLY_OPT()) { - ret = hx < 1.0h ? AS_HALF((short)QNANBITPATT_HP16) : ret; + ret = hx < 1.0h ? QNAN_F16 : ret; } return ret; diff --git a/ocml/src/atan2D.cl b/ocml/src/atan2D.cl index e59bbade..6c962374 100644 --- a/ocml/src/atan2D.cl +++ b/ocml/src/atan2D.cl @@ -41,7 +41,7 @@ MATH_MANGLE(atan2)(double y, double x) a = (BUILTIN_ISINF_F64(x) & BUILTIN_ISINF_F64(y)) ? t : a; a = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - AS_DOUBLE(QNANBITPATT_DP64) : a; + QNAN_F64 : a; } return BUILTIN_COPYSIGN_F64(a, y); diff --git a/ocml/src/atan2F.cl b/ocml/src/atan2F.cl index e54f8ded..573b6fcd 100644 --- a/ocml/src/atan2F.cl +++ b/ocml/src/atan2F.cl @@ -47,7 +47,7 @@ MATH_MANGLE(atan2)(float y, float x) // x or y is NaN a = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? - AS_FLOAT(QNANBITPATT_SP32) : a; + QNAN_F32 : a; } return BUILTIN_COPYSIGN_F32(a, y); diff --git a/ocml/src/atan2H.cl b/ocml/src/atan2H.cl index 73f15217..42dde2a0 100644 --- a/ocml/src/atan2H.cl +++ b/ocml/src/atan2H.cl @@ -44,7 +44,7 @@ MATH_MANGLE(atan2)(half y, half x) // x or y is NaN a = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? - AS_HALF((short)QNANBITPATT_HP16) : a; + QNAN_F16 : a; } return BUILTIN_COPYSIGN_F16(a, y); diff --git a/ocml/src/atan2piD.cl b/ocml/src/atan2piD.cl index 76063d88..fd18a2cb 100644 --- a/ocml/src/atan2piD.cl +++ b/ocml/src/atan2piD.cl @@ -38,7 +38,7 @@ MATH_MANGLE(atan2pi)(double y, double x) a = (BUILTIN_ISINF_F64(x) & BUILTIN_ISINF_F64(y)) ? t : a; a = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - AS_DOUBLE(QNANBITPATT_DP64) : a; + QNAN_F64 : a; } return BUILTIN_COPYSIGN_F64(a, y); diff --git a/ocml/src/atan2piF.cl b/ocml/src/atan2piF.cl index a03ab409..9418eeea 100644 --- a/ocml/src/atan2piF.cl +++ b/ocml/src/atan2piF.cl @@ -44,7 +44,7 @@ MATH_MANGLE(atan2pi)(float y, float x) // x or y is NaN a = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? - AS_FLOAT(QNANBITPATT_SP32) : a; + QNAN_F32 : a; } return BUILTIN_COPYSIGN_F32(a, y); diff --git a/ocml/src/atan2piH.cl b/ocml/src/atan2piH.cl index 61742e69..ff5dbed6 100644 --- a/ocml/src/atan2piH.cl +++ b/ocml/src/atan2piH.cl @@ -41,7 +41,7 @@ MATH_MANGLE(atan2pi)(half y, half x) // x or y is NaN a = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? - AS_HALF((short)QNANBITPATT_HP16) : a; + QNAN_F16 : a; } return BUILTIN_COPYSIGN_F16(a, y); diff --git a/ocml/src/atanhD.cl b/ocml/src/atanhD.cl index 7c6e38bb..fdbeb7c0 100644 --- a/ocml/src/atanhD.cl +++ b/ocml/src/atanhD.cl @@ -21,7 +21,7 @@ MATH_MANGLE(atanh)(double x) z = y < 0x1.0p-27 ? y : z; if (!FINITE_ONLY_OPT()) { - z = y > 1.0 ? AS_DOUBLE(QNANBITPATT_DP64) : z; + z = y > 1.0 ? QNAN_F64 : z; z = y == 1.0 ? AS_DOUBLE(PINFBITPATT_DP64) : z; } diff --git a/ocml/src/atanhF.cl b/ocml/src/atanhF.cl index 8340c9b8..98b4fc9e 100644 --- a/ocml/src/atanhF.cl +++ b/ocml/src/atanhF.cl @@ -21,7 +21,7 @@ MATH_MANGLE(atanh)(float x) z = y < 0x1.0p-12f ? y : z; if (!FINITE_ONLY_OPT()) { - z = y > 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : z; + z = y > 1.0f ? QNAN_F32 : z; z = y == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : z; } diff --git a/ocml/src/atanhH.cl b/ocml/src/atanhH.cl index ec16754e..16534ac7 100644 --- a/ocml/src/atanhH.cl +++ b/ocml/src/atanhH.cl @@ -20,7 +20,7 @@ MATH_MANGLE(atanh)(half hx) if (!FINITE_ONLY_OPT()) { ret = x == 1.0f ? AS_HALF((short)PINFBITPATT_HP16) : ret; - ret = (x > 1.0f) | BUILTIN_ISNAN_F32(x) ? AS_HALF((short)QNANBITPATT_HP16) : ret; + ret = (x > 1.0f) | BUILTIN_ISNAN_F32(x) ? QNAN_F16 : ret; } return BUILTIN_COPYSIGN_F16(ret, hx); diff --git a/ocml/src/catanhD.cl b/ocml/src/catanhD.cl index 1539e12b..06f7a229 100644 --- a/ocml/src/catanhD.cl +++ b/ocml/src/catanhD.cl @@ -47,7 +47,7 @@ MATH_MANGLE(catanh)(double2 z) rr = x == 0.0 ? 0.0 : rr; rr = BUILTIN_ISINF_F64(x) ? 0.0 : rr; rr = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISINF_F64(y)) ? 0.0 : rr; - ri = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISFINITE_F64(y)) ? AS_DOUBLE(QNANBITPATT_DP64) : ri; + ri = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISFINITE_F64(y)) ? QNAN_F64 : ri; ri = BUILTIN_ISNAN_F64(y) ? y : ri; } diff --git a/ocml/src/catanhF.cl b/ocml/src/catanhF.cl index e0267eed..47704bbb 100644 --- a/ocml/src/catanhF.cl +++ b/ocml/src/catanhF.cl @@ -47,7 +47,7 @@ MATH_MANGLE(catanh)(float2 z) rr = x == 0.0f ? 0.0f : rr; rr = BUILTIN_ISINF_F32(x) ? 0.0f : rr; rr = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISINF_F32(y)) ? 0.0f : rr; - ri = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISFINITE_F32(y)) ? AS_FLOAT(QNANBITPATT_SP32) : ri; + ri = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISFINITE_F32(y)) ? QNAN_F32 : ri; ri = BUILTIN_ISNAN_F32(y) ? y : ri; } diff --git a/ocml/src/cexpD.cl b/ocml/src/cexpD.cl index 89ff0189..858955b4 100644 --- a/ocml/src/cexpD.cl +++ b/ocml/src/cexpD.cl @@ -30,7 +30,7 @@ MATH_MANGLE(cexp)(double2 z) } if (BUILTIN_CLASS_F64(x, CLASS_PINF)) { rr = isfinite ? rr : AS_DOUBLE(PINFBITPATT_DP64); - ri = isfinite ? ri : AS_DOUBLE(QNANBITPATT_DP64); + ri = isfinite ? ri : QNAN_F64; ri = y == 0.0 ? y : ri; } ri = (BUILTIN_ISNAN_F64(x) & (y == 0.0)) ? y : ri; diff --git a/ocml/src/cexpF.cl b/ocml/src/cexpF.cl index f254ee17..8a98eb3f 100644 --- a/ocml/src/cexpF.cl +++ b/ocml/src/cexpF.cl @@ -30,7 +30,7 @@ MATH_MANGLE(cexp)(float2 z) } if (BUILTIN_CLASS_F32(x, CLASS_PINF)) { rr = finite ? rr : AS_FLOAT(PINFBITPATT_SP32); - ri = finite ? ri : AS_FLOAT(QNANBITPATT_SP32); + ri = finite ? ri : QNAN_F32; ri = y == 0.0f ? y : ri; } ri = (BUILTIN_ISNAN_F32(x) & (y == 0.0f)) ? y : ri; diff --git a/ocml/src/cosF.cl b/ocml/src/cosF.cl index 8ce912e7..638e56ee 100644 --- a/ocml/src/cosF.cl +++ b/ocml/src/cosF.cl @@ -26,7 +26,7 @@ MATH_MANGLE(cos)(float x) c = AS_FLOAT(AS_INT(c) ^ (r.i > 1 ? 0x80000000 : 0)); if (!FINITE_ONLY_OPT()) { - c = BUILTIN_ISFINITE_F32(ax) ? c : AS_FLOAT(QNANBITPATT_SP32); + c = BUILTIN_ISFINITE_F32(ax) ? c : QNAN_F32; } return c; diff --git a/ocml/src/cospiF.cl b/ocml/src/cospiF.cl index 1872f0db..66c4078f 100644 --- a/ocml/src/cospiF.cl +++ b/ocml/src/cospiF.cl @@ -20,7 +20,7 @@ MATH_MANGLE(cospi)(float x) c = AS_FLOAT(AS_INT(c) ^ (r.i > 1 ? 0x80000000 : 0)); if (!FINITE_ONLY_OPT()) { - c = BUILTIN_ISFINITE_F32(ax) ? c : AS_FLOAT(QNANBITPATT_SP32); + c = BUILTIN_ISFINITE_F32(ax) ? c : QNAN_F32; } return c; diff --git a/ocml/src/csqrtD.cl b/ocml/src/csqrtD.cl index af182c1f..fd0f684d 100644 --- a/ocml/src/csqrtD.cl +++ b/ocml/src/csqrtD.cl @@ -15,7 +15,7 @@ MATH_MANGLE(csqrt)(double2 z) double t = BUILTIN_MAX_F64(a, b); if (!FINITE_ONLY_OPT()) { - t = (BUILTIN_ISNAN_F64(a) | BUILTIN_ISNAN_F64(b)) ? AS_DOUBLE(QNANBITPATT_DP64) : t; + t = (BUILTIN_ISNAN_F64(a) | BUILTIN_ISNAN_F64(b)) ? QNAN_F64 : t; } int e = BUILTIN_FREXP_EXP_F64(t); diff --git a/ocml/src/ctanhD.cl b/ocml/src/ctanhD.cl index f1ebc012..54ec6583 100644 --- a/ocml/src/ctanhD.cl +++ b/ocml/src/ctanhD.cl @@ -43,8 +43,8 @@ MATH_MANGLE(ctanh)(double2 z) bool xn = BUILTIN_ISNAN_F64(x); bool yin = !BUILTIN_ISFINITE_F64(z.y); bool ni = BUILTIN_CLASS_F64(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR) & yin; - rr = (ni | xn) ? AS_DOUBLE(QNANBITPATT_DP64) : rr; - ri = ni ? AS_DOUBLE(QNANBITPATT_DP64) : ri; + rr = (ni | xn) ? QNAN_F64 : rr; + ri = ni ? QNAN_F64 : ri; ri = (BUILTIN_ISINF_F64(x) & yin) ? 0.0 : ri; ri = (xn & (z.y == 0.0)) ? z.y : ri; } diff --git a/ocml/src/ctanhF.cl b/ocml/src/ctanhF.cl index 55446477..f9a6a720 100644 --- a/ocml/src/ctanhF.cl +++ b/ocml/src/ctanhF.cl @@ -43,8 +43,8 @@ MATH_MANGLE(ctanh)(float2 z) bool xn = BUILTIN_ISNAN_F32(x); bool yin = !BUILTIN_ISFINITE_F32(z.y); bool ni = BUILTIN_CLASS_F32(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR) & yin; - rr = (ni | xn) ? AS_FLOAT(QNANBITPATT_SP32) : rr; - ri = ni ? AS_FLOAT(QNANBITPATT_SP32) : ri; + rr = (ni | xn) ? QNAN_F32 : rr; + ri = ni ? QNAN_F32 : ri; ri = (BUILTIN_ISINF_F32(x) & yin) ? 0.0f : ri; ri = (xn & (z.y == 0.0f)) ? z.y : ri; } diff --git a/ocml/src/erfcinvD.cl b/ocml/src/erfcinvD.cl index f6b8c5b9..008a585e 100644 --- a/ocml/src/erfcinvD.cl +++ b/ocml/src/erfcinvD.cl @@ -86,7 +86,7 @@ MATH_MANGLE(erfcinv)(double y) } if (!FINITE_ONLY_OPT()) { - ret = ((y < 0.0) | (y > 2.0)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = ((y < 0.0) | (y > 2.0)) ? QNAN_F64 : ret; ret = y == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; ret = y == 2.0 ? AS_DOUBLE(NINFBITPATT_DP64) : ret; } diff --git a/ocml/src/erfcinvF.cl b/ocml/src/erfcinvF.cl index 226dad7c..27cd16b3 100644 --- a/ocml/src/erfcinvF.cl +++ b/ocml/src/erfcinvF.cl @@ -42,7 +42,7 @@ MATH_MANGLE(erfcinv)(float y) } if (!FINITE_ONLY_OPT()) { - ret = ((y < 0.0f) | (y > 2.0f)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = ((y < 0.0f) | (y > 2.0f)) ? QNAN_F32 : ret; ret = y == 0.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; ret = y == 2.0f ? AS_FLOAT(NINFBITPATT_SP32) : ret; } diff --git a/ocml/src/erfinvD.cl b/ocml/src/erfinvD.cl index c2f51b0c..a32c43f4 100644 --- a/ocml/src/erfinvD.cl +++ b/ocml/src/erfinvD.cl @@ -90,7 +90,7 @@ MATH_MANGLE(erfinv)(double x) } if (!FINITE_ONLY_OPT()) { - ret = ax > 1.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = ax > 1.0 ? QNAN_F64 : ret; ret = ax == 1.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; } diff --git a/ocml/src/erfinvF.cl b/ocml/src/erfinvF.cl index e9ee21f8..3dea33d8 100644 --- a/ocml/src/erfinvF.cl +++ b/ocml/src/erfinvF.cl @@ -48,7 +48,7 @@ MATH_MANGLE(erfinv)(float x) float ret = p*ax; if (!FINITE_ONLY_OPT()) { - ret = ax > 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = ax > 1.0f ? QNAN_F32 : ret; ret = ax == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; } diff --git a/ocml/src/hypotD.cl b/ocml/src/hypotD.cl index e4c08054..b12022f2 100644 --- a/ocml/src/hypotD.cl +++ b/ocml/src/hypotD.cl @@ -20,7 +20,7 @@ MATH_MANGLE(hypot)(double x, double y) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - AS_DOUBLE(QNANBITPATT_DP64) : ret; + QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? AS_DOUBLE(PINFBITPATT_DP64) : ret; diff --git a/ocml/src/len3D.cl b/ocml/src/len3D.cl index 94e65721..7d1e991c 100644 --- a/ocml/src/len3D.cl +++ b/ocml/src/len3D.cl @@ -33,7 +33,7 @@ MATH_MANGLE(len3)(double x, double y, double z) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | - BUILTIN_ISNAN_F64(z)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + BUILTIN_ISNAN_F64(z)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | BUILTIN_ISINF_F64(z)) ? AS_DOUBLE(PINFBITPATT_DP64) : ret; diff --git a/ocml/src/len4D.cl b/ocml/src/len4D.cl index 71c559a5..bd4ec4d1 100644 --- a/ocml/src/len4D.cl +++ b/ocml/src/len4D.cl @@ -40,7 +40,7 @@ MATH_MANGLE(len4)(double x, double y, double z, double w) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | - BUILTIN_ISNAN_F64(z) | BUILTIN_ISNAN_F64(w)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + BUILTIN_ISNAN_F64(z) | BUILTIN_ISNAN_F64(w)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | BUILTIN_ISINF_F64(z) | diff --git a/ocml/src/log1pD.cl b/ocml/src/log1pD.cl index ea577c2e..e86e5787 100644 --- a/ocml/src/log1pD.cl +++ b/ocml/src/log1pD.cl @@ -19,7 +19,7 @@ MATH_MANGLE(log1p)(double x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z; - z = x < -1.0 ? AS_DOUBLE(QNANBITPATT_DP64) : z; + z = x < -1.0 ? QNAN_F64 : z; z = x == -1.0 ? AS_DOUBLE(NINFBITPATT_DP64) : z; } diff --git a/ocml/src/log1pF.cl b/ocml/src/log1pF.cl index a9f96a75..8b4e11a2 100644 --- a/ocml/src/log1pF.cl +++ b/ocml/src/log1pF.cl @@ -19,7 +19,7 @@ MATH_MANGLE(log1p)(float x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z; - z = x < -1.0f ? AS_FLOAT(QNANBITPATT_SP32) : z; + z = x < -1.0f ? QNAN_F32 : z; z = x == -1.0f ? AS_FLOAT(NINFBITPATT_SP32) : z; } diff --git a/ocml/src/logD_base.h b/ocml/src/logD_base.h index 11ee8bd4..2d918c7e 100644 --- a/ocml/src/logD_base.h +++ b/ocml/src/logD_base.h @@ -45,7 +45,7 @@ MATH_MANGLE(log)(double a) if (!FINITE_ONLY_OPT()) { ret = BUILTIN_ISINF_F64(a) ? a : ret; - ret = a < 0.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = a < 0.0 ? QNAN_F64 : ret; ret = a == 0.0 ? AS_DOUBLE(NINFBITPATT_DP64) : ret; } diff --git a/ocml/src/powD_base.h b/ocml/src/powD_base.h index 55b7da21..bc34e522 100644 --- a/ocml/src/powD_base.h +++ b/ocml/src/powD_base.h @@ -71,20 +71,20 @@ MATH_MANGLE(pow)(double x, double y) ret = ax < 1.0 ? iz : zi; if (y == 0.0) - ret = x == 0.0 || BUILTIN_ISINF_F64(x) ? AS_DOUBLE(QNANBITPATT_DP64) : 1.0; + ret = x == 0.0 || BUILTIN_ISINF_F64(x) ? QNAN_F64 : 1.0; if (x == 1.0) - ret = BUILTIN_ISINF_F64(y) ? AS_DOUBLE(QNANBITPATT_DP64) : 1.0; + ret = BUILTIN_ISINF_F64(y) ? QNAN_F64 : 1.0; if (x < 0.0 || BUILTIN_ISNAN_F64(x) || BUILTIN_ISNAN_F64(y)) - ret = AS_DOUBLE(QNANBITPATT_DP64); + ret = QNAN_F64; #elif defined COMPILING_POWN if (BUILTIN_ISINF_F64(ax) || x == 0.0) ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); if (BUILTIN_ISNAN_F64(x)) - ret = AS_DOUBLE(QNANBITPATT_DP64); - + ret = QNAN_F64; + if (ny == 0) ret = 1.0; #elif defined COMPILING_ROOTN @@ -92,10 +92,10 @@ MATH_MANGLE(pow)(double x, double y) ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); if ((x < 0.0 && inty != 1) || ny == 0) - ret = AS_DOUBLE(QNANBITPATT_DP64); + ret = QNAN_F64; #else if (x < 0.0 && !inty) - ret = AS_DOUBLE(QNANBITPATT_DP64); + ret = QNAN_F64; if (BUILTIN_ISINF_F64(ay)) ret = ax == 1.0 ? ax : (samesign(y, ax - 1.0) ? ay : 0.0); @@ -104,7 +104,7 @@ MATH_MANGLE(pow)(double x, double y) ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); if (BUILTIN_ISNAN_F64(x) || BUILTIN_ISNAN_F64(y)) - ret = AS_DOUBLE(QNANBITPATT_DP64); + ret = QNAN_F64; if (x == 1.0 || y == 0.0) ret = 1.0; diff --git a/ocml/src/powF_base.h b/ocml/src/powF_base.h index 0d0b80bf..2fff63f6 100644 --- a/ocml/src/powF_base.h +++ b/ocml/src/powF_base.h @@ -89,20 +89,20 @@ MATH_MANGLE(pow)(float x, float y) ret = ax < 1.0f ? iz : zi; if (y == 0.0f) - ret = x == 0.0f || BUILTIN_ISINF_F32(x) ? AS_FLOAT(QNANBITPATT_SP32) : 1.0f; + ret = x == 0.0f || BUILTIN_ISINF_F32(x) ? QNAN_F32 : 1.0f; if (x == 1.0f) - ret = BUILTIN_ISINF_F32(y) ? AS_FLOAT(QNANBITPATT_SP32) : 1.0f; + ret = BUILTIN_ISINF_F32(y) ? QNAN_F32 : 1.0f; if (x < 0.0f || BUILTIN_ISNAN_F32(x) || BUILTIN_ISNAN_F32(y)) - ret = AS_FLOAT(QNANBITPATT_SP32); + ret = QNAN_F32; #elif defined COMPILING_POWN if (BUILTIN_ISINF_F32(ax) || x == 0.0f) ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); if (BUILTIN_ISNAN_F32(x)) - ret = AS_FLOAT(QNANBITPATT_SP32); - + ret = QNAN_F32; + if (ny == 0) ret = 1.0f; #elif defined COMPILING_ROOTN @@ -110,10 +110,10 @@ MATH_MANGLE(pow)(float x, float y) ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); if ((x < 0.0f && inty != 1) || ny == 0) - ret = AS_FLOAT(QNANBITPATT_SP32); + ret = QNAN_F32; #else if (x < 0.0f && !inty) - ret = AS_FLOAT(QNANBITPATT_SP32); + ret = QNAN_F32; if (BUILTIN_ISINF_F32(ay)) ret = ax == 1.0f ? ax : (samesign(y, ax - 1.0f) ? ay : 0.0f); @@ -122,7 +122,7 @@ MATH_MANGLE(pow)(float x, float y) ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); if (BUILTIN_ISNAN_F32(x) || BUILTIN_ISNAN_F32(y)) - ret = AS_FLOAT(QNANBITPATT_SP32); + ret = QNAN_F32; if (x == 1.0f || y == 0.0f) ret = 1.0f; diff --git a/ocml/src/powH_base.h b/ocml/src/powH_base.h index 0d07ca15..c3055dd0 100644 --- a/ocml/src/powH_base.h +++ b/ocml/src/powH_base.h @@ -70,18 +70,18 @@ MATH_MANGLE(pow)(half x, half y) ret = (ax_lt_1 & y_eq_ninf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; ret = (ax_eq_1 & ay_lt_inf) ? 1.0h : ret; - ret = (ax_eq_1 & ay_eq_pinf) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = (ax_eq_1 & ay_eq_pinf) ? QNAN_F16 : ret; ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; ret = (ax_gt_1 & y_eq_pinf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = (ax_lt_pinf & ay_eq_0) ? 1.0h : ret; ret = (ax_eq_pinf & !y_pos) ? 0.0h : ret; ret = (ax_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = (ax_eq_pinf & y_eq_pinf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; - ret = (ax_eq_pinf & ay_eq_0) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = (ax_eq_pinf & ay_eq_0) ? QNAN_F16 : ret; ret = (ax_eq_0 & !y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = (ax_eq_0 & y_pos) ? 0.0h : ret; - ret = (ax_eq_0 & ay_eq_0) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; - ret = (ax_ne_0 & !x_pos) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = (ax_eq_0 & ay_eq_0) ? QNAN_F16 : ret; + ret = (ax_ne_0 & !x_pos) ? QNAN_F16 : ret; ret = ax_eq_nan ? x : ret; ret = ay_eq_nan ? y : ret; } else { @@ -130,7 +130,7 @@ MATH_MANGLE(pow)(half x, half y) bool y_pos = ny >= 0; if (!FINITE_ONLY_OPT()) { - ret = (!x_pos & (inty == 2)) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = (!x_pos & (inty == 2)) ? QNAN_F16 : ret; half xinf = BUILTIN_COPYSIGN_F16(AS_HALF((ushort)PINFBITPATT_HP16), x); ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; ret = (ax_eq_0 & !y_pos & (inty == 2)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; @@ -142,7 +142,7 @@ MATH_MANGLE(pow)(half x, half y) ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; ret = (x_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = ax_eq_nan ? x : ret; - ret = ny == 0 ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = ny == 0 ? QNAN_F16 : ret; } else { half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; @@ -169,7 +169,7 @@ MATH_MANGLE(pow)(half x, half y) bool y_pos = BUILTIN_CLASS_F16(y, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); if (!FINITE_ONLY_OPT()) { - ret = (!x_pos & (inty == 0)) ? AS_HALF((ushort)QNANBITPATT_HP16) : ret; + ret = (!x_pos & (inty == 0)) ? QNAN_F16 : ret; ret = (ax_lt_1 & y_eq_ninf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; diff --git a/ocml/src/remainderD_base.h b/ocml/src/remainderD_base.h index 0266be8a..d0a3ace8 100644 --- a/ocml/src/remainderD_base.h +++ b/ocml/src/remainderD_base.h @@ -121,13 +121,13 @@ MATH_MANGLE(remainder)(double x, double y) } if (!FINITE_ONLY_OPT()) { - ret = y == 0.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = y == 0.0 ? QNAN_F64 : ret; #if defined(COMPILING_REMQUO) q7 = y == 0.0 ? 0 : q7; #endif bool c = !BUILTIN_ISNAN_F64(y) && BUILTIN_ISFINITE_F64(x); - ret = c ? ret : AS_DOUBLE(QNANBITPATT_DP64); + ret = c ? ret : QNAN_F64; #if defined(COMPILING_REMQUO) q7 = c ? q7 : 0; #endif diff --git a/ocml/src/remainderF_base.h b/ocml/src/remainderF_base.h index cf433851..b1fb5b22 100644 --- a/ocml/src/remainderF_base.h +++ b/ocml/src/remainderF_base.h @@ -147,13 +147,13 @@ MATH_MANGLE(remainder)(float x, float y) } if (!FINITE_ONLY_OPT()) { - ret = y == 0.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = y == 0.0f ? QNAN_F32 : ret; #if defined(COMPILING_REMQUO) q7 = y == 0.0f ? 0 : q7; #endif bool c = !BUILTIN_ISNAN_F32(y) && BUILTIN_ISFINITE_F32(x); - ret = c ? ret : AS_FLOAT(QNANBITPATT_SP32); + ret = c ? ret : QNAN_F32; #if defined(COMPILING_REMQUO) q7 = c ? q7 : 0; #endif diff --git a/ocml/src/remainderH_base.h b/ocml/src/remainderH_base.h index e1b0f259..3912a24b 100644 --- a/ocml/src/remainderH_base.h +++ b/ocml/src/remainderH_base.h @@ -123,13 +123,13 @@ MATH_MANGLE(remainder)(half x, half y) } if (!FINITE_ONLY_OPT()) { - ret = y == 0.0h ? AS_HALF((short)QNANBITPATT_HP16) : ret; + ret = y == 0.0h ? QNAN_F16 : ret; #if defined(COMPILING_REMQUO) q7 = y == 0.0h ? 0 : q7; #endif bool c = !BUILTIN_ISNAN_F16(y) && BUILTIN_ISFINITE_F16(x); - ret = c ? ret : AS_HALF((short)QNANBITPATT_HP16); + ret = c ? ret : QNAN_F16; #if defined(COMPILING_REMQUO) q7 = c ? q7 : 0; #endif diff --git a/ocml/src/rhypotD.cl b/ocml/src/rhypotD.cl index 8fb40242..d21521eb 100644 --- a/ocml/src/rhypotD.cl +++ b/ocml/src/rhypotD.cl @@ -26,7 +26,7 @@ MATH_MANGLE(rhypot)(double x, double y) ret = t == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - AS_DOUBLE(QNANBITPATT_DP64) : ret; + QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? 0.0 : ret; } diff --git a/ocml/src/rlen3D.cl b/ocml/src/rlen3D.cl index bf3feba3..267a78da 100644 --- a/ocml/src/rlen3D.cl +++ b/ocml/src/rlen3D.cl @@ -39,7 +39,7 @@ MATH_MANGLE(rlen3)(double x, double y, double z) ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | - BUILTIN_ISNAN_F64(z)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + BUILTIN_ISNAN_F64(z)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | diff --git a/ocml/src/rlen4D.cl b/ocml/src/rlen4D.cl index 3b149643..7d98a3fc 100644 --- a/ocml/src/rlen4D.cl +++ b/ocml/src/rlen4D.cl @@ -48,7 +48,7 @@ MATH_MANGLE(rlen4)(double x, double y, double z, double w) ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | BUILTIN_ISNAN_F64(z) | - BUILTIN_ISNAN_F64(w)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + BUILTIN_ISNAN_F64(w)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | diff --git a/ocml/src/scalbD.cl b/ocml/src/scalbD.cl index ec1f9aca..1269103f 100644 --- a/ocml/src/scalbD.cl +++ b/ocml/src/scalbD.cl @@ -14,9 +14,9 @@ MATH_MANGLE(scalb)(double x, double y) double ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F64(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; - ret = (BUILTIN_ISZERO_F64(x) & BUILTIN_CLASS_F64(y, CLASS_PINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; - ret = (BUILTIN_ISINF_F64(x) & BUILTIN_CLASS_F64(y, CLASS_NINF)) ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? QNAN_F64 : ret; + ret = (BUILTIN_ISZERO_F64(x) & BUILTIN_CLASS_F64(y, CLASS_PINF)) ? QNAN_F64 : ret; + ret = (BUILTIN_ISINF_F64(x) & BUILTIN_CLASS_F64(y, CLASS_NINF)) ? QNAN_F64 : ret; } return ret; diff --git a/ocml/src/scalbF.cl b/ocml/src/scalbF.cl index 16b4ec0d..89e2ddd3 100644 --- a/ocml/src/scalbF.cl +++ b/ocml/src/scalbF.cl @@ -14,9 +14,9 @@ MATH_MANGLE(scalb)(float x, float y) float ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F32(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; - ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_PINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; - ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_NINF)) ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? QNAN_F32 : ret; + ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_PINF)) ? QNAN_F32 : ret; + ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_NINF)) ? QNAN_F32 : ret; } return ret; diff --git a/ocml/src/scalbH.cl b/ocml/src/scalbH.cl index edbe14c7..ed90be6e 100644 --- a/ocml/src/scalbH.cl +++ b/ocml/src/scalbH.cl @@ -16,9 +16,9 @@ MATH_MANGLE(scalb)(half x, half y) half ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F16(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? AS_HALF((short)QNANBITPATT_HP16) : ret; - ret = (BUILTIN_ISZERO_F16(x) & BUILTIN_CLASS_F16(y, CLASS_PINF)) ? AS_HALF((short)QNANBITPATT_HP16) : ret; - ret = (BUILTIN_ISINF_F16(x) & BUILTIN_CLASS_F16(y, CLASS_NINF)) ? AS_HALF((short)QNANBITPATT_HP16) : ret; + ret = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? QNAN_F16 : ret; + ret = (BUILTIN_ISZERO_F16(x) & BUILTIN_CLASS_F16(y, CLASS_PINF)) ? QNAN_F16 : ret; + ret = (BUILTIN_ISINF_F16(x) & BUILTIN_CLASS_F16(y, CLASS_NINF)) ? QNAN_F16 : ret; } return ret; diff --git a/ocml/src/sinF.cl b/ocml/src/sinF.cl index fb5be583..d7ddef47 100644 --- a/ocml/src/sinF.cl +++ b/ocml/src/sinF.cl @@ -26,7 +26,7 @@ MATH_MANGLE(sin)(float x) (AS_INT(x) ^ AS_INT(ax))); if (!FINITE_ONLY_OPT()) { - s = BUILTIN_ISFINITE_F32(ax) ? s : AS_FLOAT(QNANBITPATT_SP32); + s = BUILTIN_ISFINITE_F32(ax) ? s : QNAN_F32; } return s; diff --git a/ocml/src/sincosF.cl b/ocml/src/sincosF.cl index 1c14d914..89710618 100644 --- a/ocml/src/sincosF.cl +++ b/ocml/src/sincosF.cl @@ -31,8 +31,8 @@ MATH_MANGLE(sincos)(float x, __private float *cp) if (!FINITE_ONLY_OPT()) { bool finite = BUILTIN_ISFINITE_F32(ax); - c = finite ? c : AS_FLOAT(QNANBITPATT_SP32); - s = finite ? s : AS_FLOAT(QNANBITPATT_SP32); + c = finite ? c : QNAN_F32; + s = finite ? s : QNAN_F32; } *cp = c; diff --git a/ocml/src/sincospiF.cl b/ocml/src/sincospiF.cl index c219990f..9be5c85a 100644 --- a/ocml/src/sincospiF.cl +++ b/ocml/src/sincospiF.cl @@ -26,8 +26,8 @@ MATH_MANGLE(sincospi)(float x, __private float *cp) if (!FINITE_ONLY_OPT()) { bool finite = BUILTIN_ISFINITE_F32(ax); - c = finite ? c : AS_FLOAT(QNANBITPATT_SP32); - s = finite ? s : AS_FLOAT(QNANBITPATT_SP32); + c = finite ? c : QNAN_F32; + s = finite ? s : QNAN_F32; } *cp = c; diff --git a/ocml/src/sinpiF.cl b/ocml/src/sinpiF.cl index 1509fd1c..c2149e48 100644 --- a/ocml/src/sinpiF.cl +++ b/ocml/src/sinpiF.cl @@ -19,7 +19,7 @@ MATH_MANGLE(sinpi)(float x) s = AS_FLOAT(AS_INT(s) ^ (r.i > 1 ? 0x80000000 : 0) ^ (AS_INT(x) ^ AS_INT(ax))); if (!FINITE_ONLY_OPT()) { - s = BUILTIN_ISFINITE_F32(ax) ? s : AS_FLOAT(QNANBITPATT_SP32); + s = BUILTIN_ISFINITE_F32(ax) ? s : QNAN_F32; } return s; diff --git a/ocml/src/tanF.cl b/ocml/src/tanF.cl index aec2c3f9..57517252 100644 --- a/ocml/src/tanF.cl +++ b/ocml/src/tanF.cl @@ -24,7 +24,7 @@ MATH_MANGLE(tan)(float x) t = AS_FLOAT(AS_INT(t) ^ (AS_INT(x) ^ AS_INT(ax))); if (!FINITE_ONLY_OPT()) { - t = BUILTIN_ISFINITE_F32(ax) ? t : AS_FLOAT(QNANBITPATT_SP32); + t = BUILTIN_ISFINITE_F32(ax) ? t : QNAN_F32; } return t; diff --git a/ocml/src/tgammaD.cl b/ocml/src/tgammaD.cl index 1318fb7d..d85e253e 100644 --- a/ocml/src/tgammaD.cl +++ b/ocml/src/tgammaD.cl @@ -98,7 +98,7 @@ MATH_MANGLE(tgamma)(double x) ret = etonegz * zpow * MATH_DIV(num,den); } else { ret = MATH_DIV(den*pi, negadj*etonegz*zpow*num); - ret = BUILTIN_FRACTION_F64(x) == 0.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = BUILTIN_FRACTION_F64(x) == 0.0 ? QNAN_F64 : ret; } } else { const double c0 = 2.5066282746310007; @@ -141,7 +141,7 @@ MATH_MANGLE(tgamma)(double x) } else { ret = BUILTIN_COPYSIGN_F64(0.0, negadj); } - ret = BUILTIN_FRACTION_F64(x) == 0.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = BUILTIN_FRACTION_F64(x) == 0.0 ? QNAN_F64 : ret; } else { ret = x; } diff --git a/ocml/src/tgammaF.cl b/ocml/src/tgammaF.cl index a4c2e8fd..7671d20d 100644 --- a/ocml/src/tgammaF.cl +++ b/ocml/src/tgammaF.cl @@ -45,7 +45,7 @@ MATH_MANGLE(tgamma)(float x) float p = s*x*t2*t1*t1; ret = MATH_DIV(-sqrtpiby2*d, MATH_MAD(p, pt, p)); ret = x < -42.0f ? 0.0f : ret; - ret = BUILTIN_FRACTION_F32(x) == 0.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = BUILTIN_FRACTION_F32(x) == 0.0f ? QNAN_F32 : ret; } } else { ret = MATH_MAD(x, MATH_MAD(x, MATH_MAD(x, diff --git a/ocml/src/y1D.cl b/ocml/src/y1D.cl index b8a6275a..b6a5b664 100644 --- a/ocml/src/y1D.cl +++ b/ocml/src/y1D.cl @@ -145,7 +145,7 @@ MATH_MANGLE(y1)(double x) ret = MATH_DIV(-twobypi, BUILTIN_ABS_F64(x)); else ret = MATH_MAD(ret, x, twobypi*(MATH_MANGLE(j1)(x) * MATH_MANGLE(log)(x) - MATH_RCP(x))); - ret = x < 0.0 ? AS_DOUBLE(QNANBITPATT_DP64) : ret; + ret = x < 0.0 ? QNAN_F64 : ret; } } else { double r = MATH_RCP(x); diff --git a/ocml/src/y1F.cl b/ocml/src/y1F.cl index 64297af7..8145e0ad 100644 --- a/ocml/src/y1F.cl +++ b/ocml/src/y1F.cl @@ -142,7 +142,7 @@ MATH_MANGLE(y1)(float x) ret = MATH_DIV(-twobypi, BUILTIN_ABS_F32(x)); else ret = MATH_MAD(ret, x, twobypi*(MATH_MANGLE(j1)(x) * MATH_MANGLE(log)(x) - MATH_RCP(x))); - ret = x < 0.0f ? AS_FLOAT(QNANBITPATT_SP32) : ret; + ret = x < 0.0f ? QNAN_F32 : ret; } } else { float r = MATH_RCP(x); From 566678bc1514ee577cfd44e1b6981809bfff3de3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Dec 2022 14:51:03 -0500 Subject: [PATCH 02/22] Use PINF_*/NINF_* macros Regex and clang-format. Change-Id: I0f5acd49f90f5a36181d670562ff8f72abcb76f7 --- ocml/src/atanhD.cl | 2 +- ocml/src/atanhF.cl | 2 +- ocml/src/atanhH.cl | 2 +- ocml/src/cacoshD.cl | 2 +- ocml/src/cacoshF.cl | 2 +- ocml/src/casinhD.cl | 2 +- ocml/src/casinhF.cl | 2 +- ocml/src/catanhD.cl | 2 +- ocml/src/catanhF.cl | 2 +- ocml/src/ccoshD.cl | 4 ++-- ocml/src/ccoshF.cl | 4 ++-- ocml/src/cdivD.cl | 6 +++--- ocml/src/cdivF.cl | 6 +++--- ocml/src/cexpD.cl | 2 +- ocml/src/cexpF.cl | 2 +- ocml/src/clogD.cl | 4 ++-- ocml/src/clogF.cl | 4 ++-- ocml/src/coshD.cl | 2 +- ocml/src/coshF.cl | 2 +- ocml/src/csinhD.cl | 4 ++-- ocml/src/csinhF.cl | 4 ++-- ocml/src/erfcinvD.cl | 4 ++-- ocml/src/erfcinvF.cl | 4 ++-- ocml/src/erfcxD.cl | 4 ++-- ocml/src/erfcxF.cl | 4 ++-- ocml/src/erfinvD.cl | 2 +- ocml/src/erfinvF.cl | 2 +- ocml/src/expD_base.h | 2 +- ocml/src/expF_base.h | 8 ++++---- ocml/src/expepD.cl | 2 +- ocml/src/expepF.cl | 2 +- ocml/src/expm1D.cl | 2 +- ocml/src/expm1F.cl | 2 +- ocml/src/hypotD.cl | 2 +- ocml/src/hypotF.cl | 2 +- ocml/src/hypotH.cl | 2 +- ocml/src/len3D.cl | 2 +- ocml/src/len3F.cl | 2 +- ocml/src/len3H.cl | 2 +- ocml/src/len4D.cl | 2 +- ocml/src/len4F.cl | 2 +- ocml/src/len4H.cl | 2 +- ocml/src/lgamma_rD.cl | 4 ++-- ocml/src/lgamma_rF.cl | 4 ++-- ocml/src/log1pD.cl | 2 +- ocml/src/log1pF.cl | 2 +- ocml/src/logD_base.h | 2 +- ocml/src/logbD.cl | 2 +- ocml/src/logbF.cl | 2 +- ocml/src/logbH.cl | 2 +- ocml/src/powD_base.h | 13 +++++++----- ocml/src/powF_base.h | 13 +++++++----- ocml/src/powH_base.h | 48 +++++++++++++++++++++---------------------- ocml/src/rcbrtD.cl | 2 +- ocml/src/rhypotD.cl | 2 +- ocml/src/rlen3D.cl | 2 +- ocml/src/rlen4D.cl | 2 +- ocml/src/sinhD.cl | 2 +- ocml/src/sinhF.cl | 2 +- ocml/src/tgammaD.cl | 2 +- ocml/src/tgammaF.cl | 2 +- 61 files changed, 117 insertions(+), 111 deletions(-) diff --git a/ocml/src/atanhD.cl b/ocml/src/atanhD.cl index fdbeb7c0..990f1899 100644 --- a/ocml/src/atanhD.cl +++ b/ocml/src/atanhD.cl @@ -22,7 +22,7 @@ MATH_MANGLE(atanh)(double x) if (!FINITE_ONLY_OPT()) { z = y > 1.0 ? QNAN_F64 : z; - z = y == 1.0 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = y == 1.0 ? PINF_F64 : z; } return BUILTIN_COPYSIGN_F64(z, x); diff --git a/ocml/src/atanhF.cl b/ocml/src/atanhF.cl index 98b4fc9e..c0e0ce01 100644 --- a/ocml/src/atanhF.cl +++ b/ocml/src/atanhF.cl @@ -22,7 +22,7 @@ MATH_MANGLE(atanh)(float x) if (!FINITE_ONLY_OPT()) { z = y > 1.0f ? QNAN_F32 : z; - z = y == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : z; + z = y == 1.0f ? PINF_F32 : z; } return BUILTIN_COPYSIGN_F32(z, x); diff --git a/ocml/src/atanhH.cl b/ocml/src/atanhH.cl index 16534ac7..b23dc295 100644 --- a/ocml/src/atanhH.cl +++ b/ocml/src/atanhH.cl @@ -19,7 +19,7 @@ MATH_MANGLE(atanh)(half hx) ret = x < 0x1.0p-7f ? x : ret; if (!FINITE_ONLY_OPT()) { - ret = x == 1.0f ? AS_HALF((short)PINFBITPATT_HP16) : ret; + ret = x == 1.0f ? PINF_F16 : ret; ret = (x > 1.0f) | BUILTIN_ISNAN_F32(x) ? QNAN_F16 : ret; } diff --git a/ocml/src/cacoshD.cl b/ocml/src/cacoshD.cl index cbb10cd6..8300169d 100644 --- a/ocml/src/cacoshD.cl +++ b/ocml/src/cacoshD.cl @@ -56,7 +56,7 @@ MATH_MANGLE(cacosh)(double2 z) double ri = MATH_MANGLE(atan2)(t.y, t.x); if (!FINITE_ONLY_OPT()) { - rr = (BUILTIN_ISINF_F64(z.x) | BUILTIN_ISINF_F64(z.y)) ? AS_DOUBLE(PINFBITPATT_DP64) : rr; + rr = (BUILTIN_ISINF_F64(z.x) | BUILTIN_ISINF_F64(z.y)) ? PINF_F64 : rr; } return (double2)(rr, ri); diff --git a/ocml/src/cacoshF.cl b/ocml/src/cacoshF.cl index 14c151b9..429f58a8 100644 --- a/ocml/src/cacoshF.cl +++ b/ocml/src/cacoshF.cl @@ -56,7 +56,7 @@ MATH_MANGLE(cacosh)(float2 z) float ri = MATH_MANGLE(atan2)(t.y, t.x); if (!FINITE_ONLY_OPT()) { - rr = (BUILTIN_ISINF_F32(z.x) | BUILTIN_ISINF_F32(z.y)) ? AS_FLOAT(PINFBITPATT_SP32) : rr; + rr = (BUILTIN_ISINF_F32(z.x) | BUILTIN_ISINF_F32(z.y)) ? PINF_F32 : rr; } return (float2)(rr, ri); diff --git a/ocml/src/casinhD.cl b/ocml/src/casinhD.cl index 6d6b096d..b90b3ee7 100644 --- a/ocml/src/casinhD.cl +++ b/ocml/src/casinhD.cl @@ -56,7 +56,7 @@ MATH_MANGLE(casinh)(double2 z) double ri = BUILTIN_COPYSIGN_F64(MATH_MANGLE(atan2)(t.y, t.x), z.y); if (!FINITE_ONLY_OPT()) { - double i = BUILTIN_COPYSIGN_F64(AS_DOUBLE(PINFBITPATT_DP64), z.x); + double i = BUILTIN_COPYSIGN_F64(PINF_F64, z.x); rr = (BUILTIN_ISINF_F64(z.x) | BUILTIN_ISINF_F64(z.y)) ? i : rr; } diff --git a/ocml/src/casinhF.cl b/ocml/src/casinhF.cl index 64624329..71d8dddd 100644 --- a/ocml/src/casinhF.cl +++ b/ocml/src/casinhF.cl @@ -56,7 +56,7 @@ MATH_MANGLE(casinh)(float2 z) float ri = BUILTIN_COPYSIGN_F32(MATH_MANGLE(atan2)(t.y, t.x), z.y); if (!FINITE_ONLY_OPT()) { - float i = BUILTIN_COPYSIGN_F32(AS_FLOAT(PINFBITPATT_SP32), z.x); + float i = BUILTIN_COPYSIGN_F32(PINF_F32, z.x); rr = (BUILTIN_ISINF_F32(z.x) | BUILTIN_ISINF_F32(z.y)) ? i : rr; } diff --git a/ocml/src/catanhD.cl b/ocml/src/catanhD.cl index 06f7a229..c1680409 100644 --- a/ocml/src/catanhD.cl +++ b/ocml/src/catanhD.cl @@ -43,7 +43,7 @@ MATH_MANGLE(catanh)(double2 z) } if (!FINITE_ONLY_OPT()) { - rr = ((x == 1.0) & (y == 0.0)) ? AS_DOUBLE(PINFBITPATT_DP64) : rr; + rr = ((x == 1.0) & (y == 0.0)) ? PINF_F64 : rr; rr = x == 0.0 ? 0.0 : rr; rr = BUILTIN_ISINF_F64(x) ? 0.0 : rr; rr = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISINF_F64(y)) ? 0.0 : rr; diff --git a/ocml/src/catanhF.cl b/ocml/src/catanhF.cl index 47704bbb..62947fc6 100644 --- a/ocml/src/catanhF.cl +++ b/ocml/src/catanhF.cl @@ -43,7 +43,7 @@ MATH_MANGLE(catanh)(float2 z) } if (!FINITE_ONLY_OPT()) { - rr = ((x == 1.0f) & (y == 0.0f)) ? AS_FLOAT(PINFBITPATT_SP32) : rr; + rr = ((x == 1.0f) & (y == 0.0f)) ? PINF_F32 : rr; rr = x == 0.0f ? 0.0f : rr; rr = BUILTIN_ISINF_F32(x) ? 0.0f : rr; rr = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISINF_F32(y)) ? 0.0f : rr; diff --git a/ocml/src/ccoshD.cl b/ocml/src/ccoshD.cl index 0c0697f0..def3dd01 100644 --- a/ocml/src/ccoshD.cl +++ b/ocml/src/ccoshD.cl @@ -30,8 +30,8 @@ MATH_MANGLE(ccosh)(double2 z) sxhi = sx.hi; } else { bool b = x >= 0x1.6395a2079b70cp+9; - cxhi = b ? AS_DOUBLE(PINFBITPATT_DP64) : cx.hi; - sxhi = b ? AS_DOUBLE(PINFBITPATT_DP64) : sx.hi; + cxhi = b ? PINF_F64 : cx.hi; + sxhi = b ? PINF_F64 : sx.hi; } double rr = BUILTIN_FLDEXP_F64(cxhi * cy, 1); diff --git a/ocml/src/ccoshF.cl b/ocml/src/ccoshF.cl index 03755eb6..a2d6ae3d 100644 --- a/ocml/src/ccoshF.cl +++ b/ocml/src/ccoshF.cl @@ -30,8 +30,8 @@ MATH_MANGLE(ccosh)(float2 z) sxhi = sx.hi; } else { bool b = x >= 0x1.686fc0p+6f; - cxhi = b ? AS_FLOAT(PINFBITPATT_SP32) : cx.hi; - sxhi = b ? AS_FLOAT(PINFBITPATT_SP32) : sx.hi; + cxhi = b ? PINF_F32 : cx.hi; + sxhi = b ? PINF_F32 : sx.hi; } float rr = BUILTIN_FLDEXP_F32(cxhi * cy, 1); diff --git a/ocml/src/cdivD.cl b/ocml/src/cdivD.cl index a479795c..77750750 100644 --- a/ocml/src/cdivD.cl +++ b/ocml/src/cdivD.cl @@ -48,15 +48,15 @@ MATH_MANGLE(cdiv)(double2 zn, double2 zd) if (!FINITE_ONLY_OPT()) { if (BUILTIN_ISNAN_F64(rr) && BUILTIN_ISNAN_F64(ri)) { if (d2 == 0.0 && (!BUILTIN_ISNAN_F64(zn.x) || !BUILTIN_ISNAN_F64(zn.y))) { - double i = BUILTIN_COPYSIGN_F64(AS_DOUBLE(PINFBITPATT_DP64), zd.x); + double i = BUILTIN_COPYSIGN_F64(PINF_F64, zd.x); rr = i * zn.x; ri = i * zn.y; } else if ((BUILTIN_ISINF_F64(zn.x) || BUILTIN_ISINF_F64(zn.y)) && (BUILTIN_ISFINITE_F64(zd.x) && BUILTIN_ISFINITE_F64(zd.y))) { double znx = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zn.x) ? 1.0 : 0.0, zn.x); double zny = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zn.y) ? 1.0 : 0.0, zn.y); - rr = AS_DOUBLE(PINFBITPATT_DP64) * MATH_MAD(znx, zd.x, zny * zd.y); - ri = AS_DOUBLE(PINFBITPATT_DP64) * MATH_MAD(zny, zd.x, -znx * zd.y); + rr = PINF_F64 * MATH_MAD(znx, zd.x, zny * zd.y); + ri = PINF_F64 * MATH_MAD(zny, zd.x, -znx * zd.y); } else if ((BUILTIN_ISINF_F64(zd.x) || BUILTIN_ISINF_F64(zd.y)) && (BUILTIN_ISFINITE_F64(zn.x) && BUILTIN_ISFINITE_F64(zn.y))) { zdx = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zd.x) ? 1.0 : 0.0, zd.x); diff --git a/ocml/src/cdivF.cl b/ocml/src/cdivF.cl index 9eaa57c4..739d2767 100644 --- a/ocml/src/cdivF.cl +++ b/ocml/src/cdivF.cl @@ -48,15 +48,15 @@ MATH_MANGLE(cdiv)(float2 zn, float2 zd) if (!FINITE_ONLY_OPT()) { if (BUILTIN_ISNAN_F32(rr) && BUILTIN_ISNAN_F32(ri)) { if (d2 == 0.0f && (!BUILTIN_ISNAN_F32(zn.x) || !BUILTIN_ISNAN_F32(zn.y))) { - float i = BUILTIN_COPYSIGN_F32(AS_FLOAT(PINFBITPATT_SP32), zd.x); + float i = BUILTIN_COPYSIGN_F32(PINF_F32, zd.x); rr = i * zn.x; ri = i * zn.y; } else if ((BUILTIN_ISINF_F32(zn.x) || BUILTIN_ISINF_F32(zn.y)) && (BUILTIN_ISFINITE_F32(zd.x) && BUILTIN_ISFINITE_F32(zd.y))) { float znx = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zn.x) ? 1.0f : 0.0f, zn.x); float zny = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zn.y) ? 1.0f : 0.0f, zn.y); - rr = AS_FLOAT(PINFBITPATT_SP32) * MATH_MAD(znx, zd.x, zny * zd.y); - ri = AS_FLOAT(PINFBITPATT_SP32) * MATH_MAD(zny, zd.x, -znx * zd.y); + rr = PINF_F32 * MATH_MAD(znx, zd.x, zny * zd.y); + ri = PINF_F32 * MATH_MAD(zny, zd.x, -znx * zd.y); } else if ((BUILTIN_ISINF_F32(zd.x) || BUILTIN_ISINF_F32(zd.y)) && (BUILTIN_ISFINITE_F32(zn.x) && BUILTIN_ISFINITE_F32(zn.y))) { zdx = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zd.x) ? 1.0f : 0.0f, zd.x); diff --git a/ocml/src/cexpD.cl b/ocml/src/cexpD.cl index 858955b4..0c77a52e 100644 --- a/ocml/src/cexpD.cl +++ b/ocml/src/cexpD.cl @@ -29,7 +29,7 @@ MATH_MANGLE(cexp)(double2 z) ri = isfinite ? ri : 0.0; } if (BUILTIN_CLASS_F64(x, CLASS_PINF)) { - rr = isfinite ? rr : AS_DOUBLE(PINFBITPATT_DP64); + rr = isfinite ? rr : PINF_F64; ri = isfinite ? ri : QNAN_F64; ri = y == 0.0 ? y : ri; } diff --git a/ocml/src/cexpF.cl b/ocml/src/cexpF.cl index 8a98eb3f..85f64154 100644 --- a/ocml/src/cexpF.cl +++ b/ocml/src/cexpF.cl @@ -29,7 +29,7 @@ MATH_MANGLE(cexp)(float2 z) ri = finite ? ri : 0.0f; } if (BUILTIN_CLASS_F32(x, CLASS_PINF)) { - rr = finite ? rr : AS_FLOAT(PINFBITPATT_SP32); + rr = finite ? rr : PINF_F32; ri = finite ? ri : QNAN_F32; ri = y == 0.0f ? y : ri; } diff --git a/ocml/src/clogD.cl b/ocml/src/clogD.cl index 2662994f..f47a1082 100644 --- a/ocml/src/clogD.cl +++ b/ocml/src/clogD.cl @@ -28,8 +28,8 @@ MATH_MANGLE(clog)(double2 z) if (!FINITE_ONLY_OPT()) { - rr = ((x == 0.0) & (y == 0.0)) ? AS_DOUBLE(NINFBITPATT_DP64) : rr; - rr = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? AS_DOUBLE(PINFBITPATT_DP64) : rr; + rr = ((x == 0.0) & (y == 0.0)) ? NINF_F64 : rr; + rr = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? PINF_F64 : rr; } return (double2)(rr, ri); diff --git a/ocml/src/clogF.cl b/ocml/src/clogF.cl index 618fc9fb..2cf791b1 100644 --- a/ocml/src/clogF.cl +++ b/ocml/src/clogF.cl @@ -28,8 +28,8 @@ MATH_MANGLE(clog)(float2 z) if (!FINITE_ONLY_OPT()) { - rr = ((x == 0.0f) & (y == 0.0f)) ? AS_FLOAT(NINFBITPATT_SP32) : rr; - rr = (BUILTIN_ISINF_F32(x) | BUILTIN_ISINF_F32(y)) ? AS_FLOAT(PINFBITPATT_SP32) : rr; + rr = ((x == 0.0f) & (y == 0.0f)) ? NINF_F32 : rr; + rr = (BUILTIN_ISINF_F32(x) | BUILTIN_ISINF_F32(y)) ? PINF_F32 : rr; } return (float2)(rr, ri); diff --git a/ocml/src/coshD.cl b/ocml/src/coshD.cl index da1c54a4..dcef7ecc 100644 --- a/ocml/src/coshD.cl +++ b/ocml/src/coshD.cl @@ -21,7 +21,7 @@ MATH_MANGLE(cosh)(double x) double z = c.hi; if (!FINITE_ONLY_OPT()) { - z = x >= 0x1.633ce8fb9f87ep+9 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = x >= 0x1.633ce8fb9f87ep+9 ? PINF_F64 : z; } return z; diff --git a/ocml/src/coshF.cl b/ocml/src/coshF.cl index a395c701..a0d0a199 100644 --- a/ocml/src/coshF.cl +++ b/ocml/src/coshF.cl @@ -21,7 +21,7 @@ MATH_MANGLE(cosh)(float x) float z = c.hi; if (!FINITE_ONLY_OPT()) { - z = x > 0x1.65a9f8p+6f ? AS_FLOAT(PINFBITPATT_SP32) : z; + z = x > 0x1.65a9f8p+6f ? PINF_F32 : z; } return z; diff --git a/ocml/src/csinhD.cl b/ocml/src/csinhD.cl index 83eb30ba..c0cbbc5b 100644 --- a/ocml/src/csinhD.cl +++ b/ocml/src/csinhD.cl @@ -29,8 +29,8 @@ MATH_MANGLE(csinh)(double2 z) if (!FINITE_ONLY_OPT()) { bool b = x >= 0x1.6395a2079b70cp+9; - cxhi = b ? AS_DOUBLE(PINFBITPATT_DP64) : cxhi; - sxhi = b ? AS_DOUBLE(PINFBITPATT_DP64) : sxhi; + cxhi = b ? PINF_F64 : cxhi; + sxhi = b ? PINF_F64 : sxhi; } bool s = x >= 0x1.0p-27; diff --git a/ocml/src/csinhF.cl b/ocml/src/csinhF.cl index 06c92bd8..622c58f0 100644 --- a/ocml/src/csinhF.cl +++ b/ocml/src/csinhF.cl @@ -29,8 +29,8 @@ MATH_MANGLE(csinh)(float2 z) if (!FINITE_ONLY_OPT()) { bool b = x >= 0x1.686fc0p+6f; - cxhi = b ? AS_FLOAT(PINFBITPATT_SP32) : cxhi; - sxhi = b ? AS_FLOAT(PINFBITPATT_SP32) : sxhi; + cxhi = b ? PINF_F32 : cxhi; + sxhi = b ? PINF_F32 : sxhi; } bool s = x >= 0x1.0p-12f; diff --git a/ocml/src/erfcinvD.cl b/ocml/src/erfcinvD.cl index 008a585e..0fc466b7 100644 --- a/ocml/src/erfcinvD.cl +++ b/ocml/src/erfcinvD.cl @@ -87,8 +87,8 @@ MATH_MANGLE(erfcinv)(double y) if (!FINITE_ONLY_OPT()) { ret = ((y < 0.0) | (y > 2.0)) ? QNAN_F64 : ret; - ret = y == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; - ret = y == 2.0 ? AS_DOUBLE(NINFBITPATT_DP64) : ret; + ret = y == 0.0 ? PINF_F64 : ret; + ret = y == 2.0 ? NINF_F64 : ret; } return ret; diff --git a/ocml/src/erfcinvF.cl b/ocml/src/erfcinvF.cl index 27cd16b3..2a953a5b 100644 --- a/ocml/src/erfcinvF.cl +++ b/ocml/src/erfcinvF.cl @@ -43,8 +43,8 @@ MATH_MANGLE(erfcinv)(float y) if (!FINITE_ONLY_OPT()) { ret = ((y < 0.0f) | (y > 2.0f)) ? QNAN_F32 : ret; - ret = y == 0.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; - ret = y == 2.0f ? AS_FLOAT(NINFBITPATT_SP32) : ret; + ret = y == 0.0f ? PINF_F32 : ret; + ret = y == 2.0f ? NINF_F32 : ret; } return ret; diff --git a/ocml/src/erfcxD.cl b/ocml/src/erfcxD.cl index b6494560..ab463d63 100644 --- a/ocml/src/erfcxD.cl +++ b/ocml/src/erfcxD.cl @@ -67,7 +67,7 @@ MATH_MANGLE(erfcx)(double x) double x2l = MATH_MAD(x, x, -x2h); double e = MATH_MANGLE(exp)(x2h); ret = MATH_MAD(2.0, MATH_MAD(e, x2l, e), -ret); - ret = x < -0x1.aa0f4d2e063cep+4 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = x < -0x1.aa0f4d2e063cep+4 ? PINF_F64 : ret; } return ret; @@ -132,7 +132,7 @@ MATH_MANGLE(erfcx)(double x) double x2h = ax * ax; double x2l = BUILTIN_FMA_F64(ax, ax, -x2h); ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0 - ret; - ret = x < -27.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = x < -27.0 ? PINF_F64 : ret; } return ret; diff --git a/ocml/src/erfcxF.cl b/ocml/src/erfcxF.cl index b37e3013..eafcdad5 100644 --- a/ocml/src/erfcxF.cl +++ b/ocml/src/erfcxF.cl @@ -51,7 +51,7 @@ MATH_MANGLE(erfcx)(float x) float x2l = BUILTIN_FMA_F32(x, x, -x2h); float e = MATH_MANGLE(exp)(x2h); ret = BUILTIN_FMA_F32(2.0f, BUILTIN_FMA_F32(e, x2l, e), -ret); - ret = x < -0x1.2d6abcp+3f ? AS_FLOAT(PINFBITPATT_SP32) : ret; + ret = x < -0x1.2d6abcp+3f ? PINF_F32 : ret; } return ret; @@ -108,7 +108,7 @@ MATH_MANGLE(erfcx)(float x) } ret = MATH_MANGLE(exp)(x2h) * MATH_MANGLE(exp)(x2l) * 2.0f - ret; - ret = x < -10.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; + ret = x < -10.0f ? PINF_F32 : ret; } return ret; diff --git a/ocml/src/erfinvD.cl b/ocml/src/erfinvD.cl index a32c43f4..24da7560 100644 --- a/ocml/src/erfinvD.cl +++ b/ocml/src/erfinvD.cl @@ -91,7 +91,7 @@ MATH_MANGLE(erfinv)(double x) if (!FINITE_ONLY_OPT()) { ret = ax > 1.0 ? QNAN_F64 : ret; - ret = ax == 1.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = ax == 1.0 ? PINF_F64 : ret; } return BUILTIN_COPYSIGN_F64(ret, x); diff --git a/ocml/src/erfinvF.cl b/ocml/src/erfinvF.cl index 3dea33d8..8dc9e953 100644 --- a/ocml/src/erfinvF.cl +++ b/ocml/src/erfinvF.cl @@ -49,7 +49,7 @@ MATH_MANGLE(erfinv)(float x) if (!FINITE_ONLY_OPT()) { ret = ax > 1.0f ? QNAN_F32 : ret; - ret = ax == 1.0f ? AS_FLOAT(PINFBITPATT_SP32) : ret; + ret = ax == 1.0f ? PINF_F32 : ret; } return BUILTIN_COPYSIGN_F32(ret, x); diff --git a/ocml/src/expD_base.h b/ocml/src/expD_base.h index 5c15c3df..b98ec841 100644 --- a/ocml/src/expD_base.h +++ b/ocml/src/expD_base.h @@ -40,7 +40,7 @@ MATH_MANGLE(exp)(double x) double z = BUILTIN_FLDEXP_F64(p, (int)dn); if (!FINITE_ONLY_OPT()) { - z = x > 1024.0 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = x > 1024.0 ? PINF_F64 : z; } z = x < -1075.0 ? 0.0 : z; diff --git a/ocml/src/expF_base.h b/ocml/src/expF_base.h index ae69eecb..e330c9b8 100644 --- a/ocml/src/expF_base.h +++ b/ocml/src/expF_base.h @@ -86,10 +86,10 @@ MATH_MANGLE(exp)(float x) #if defined COMPILING_EXP r = x < -0x1.5d58a0p+6f ? 0.0f : r; - r = x > 0x1.62e430p+6f ? AS_FLOAT(PINFBITPATT_SP32) : r; + r = x > 0x1.62e430p+6f ? PINF_F32 : r; #else r = x < -0x1.2f7030p+5f ? 0.0f : r; - r = x > 0x1.344136p+5f ? AS_FLOAT(PINFBITPATT_SP32): r; + r = x > 0x1.344136p+5f ? PINF_F32 : r; #endif return r; #endif @@ -146,10 +146,10 @@ MATH_MANGLE(exp)(float x) #if defined COMPILING_EXP r = x < -0x1.9d1da0p+6f ? 0.0f : r; - r = x > 0x1.62e430p+6f ? AS_FLOAT(PINFBITPATT_SP32) : r; + r = x > 0x1.62e430p+6f ? PINF_F32 : r; #else r = x < -0x1.66d3e8p+5f ? 0.0f : r; - r = x > 0x1.344136p+5f ? AS_FLOAT(PINFBITPATT_SP32): r; + r = x > 0x1.344136p+5f ? PINF_F32 : r; #endif return r; #endif diff --git a/ocml/src/expepD.cl b/ocml/src/expepD.cl index 6e0e5781..75230030 100644 --- a/ocml/src/expepD.cl +++ b/ocml/src/expepD.cl @@ -30,7 +30,7 @@ MATH_PRIVATE(expep)(double2 x) z = BUILTIN_FLDEXP_F64(z, (int)dn); - z = x.hi > 710.0 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = x.hi > 710.0 ? PINF_F64 : z; z = x.hi < -745.0 ? 0.0 : z; #else double z = MATH_MANGLE(exp)(x.hi); diff --git a/ocml/src/expepF.cl b/ocml/src/expepF.cl index ad8e79ec..3a675626 100644 --- a/ocml/src/expepF.cl +++ b/ocml/src/expepF.cl @@ -27,7 +27,7 @@ MATH_PRIVATE(expep)(float2 x) z = BUILTIN_FLDEXP_F32(z, (int)fn); - z = x.hi > 89.0f ? AS_FLOAT(PINFBITPATT_SP32) : z; + z = x.hi > 89.0f ? PINF_F32 : z; z = x.hi < -104.0f ? 0.0f : z; #else float d = x.hi == 0x1.62e430p+6f ? 0x1.0p-17f : 0.0f; diff --git a/ocml/src/expm1D.cl b/ocml/src/expm1D.cl index 21e385d4..17376f1b 100644 --- a/ocml/src/expm1D.cl +++ b/ocml/src/expm1D.cl @@ -40,7 +40,7 @@ MATH_MANGLE(expm1)(double x) #endif if (!FINITE_ONLY_OPT()) { - z = x > 0x1.62e42fefa39efp+9 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = x > 0x1.62e42fefa39efp+9 ? PINF_F64 : z; } z = x < -37.0 ? -1.0 : z; diff --git a/ocml/src/expm1F.cl b/ocml/src/expm1F.cl index 477d57ea..75df38f9 100644 --- a/ocml/src/expm1F.cl +++ b/ocml/src/expm1F.cl @@ -32,7 +32,7 @@ MATH_MANGLE(expm1)(float x) #endif if (!FINITE_ONLY_OPT()) { - z = x > 0x1.62e42ep+6f ? AS_FLOAT(PINFBITPATT_SP32) : z; + z = x > 0x1.62e42ep+6f ? PINF_F32 : z; } z = x < -17.0f ? -1.0f : z; diff --git a/ocml/src/hypotD.cl b/ocml/src/hypotD.cl index b12022f2..ef9b5819 100644 --- a/ocml/src/hypotD.cl +++ b/ocml/src/hypotD.cl @@ -23,7 +23,7 @@ MATH_MANGLE(hypot)(double x, double y) QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? - AS_DOUBLE(PINFBITPATT_DP64) : ret; + PINF_F64 : ret; } return ret; diff --git a/ocml/src/hypotF.cl b/ocml/src/hypotF.cl index 5e09461f..258e2482 100644 --- a/ocml/src/hypotF.cl +++ b/ocml/src/hypotF.cl @@ -20,7 +20,7 @@ MATH_MANGLE(hypot)(float x, float y) if (!FINITE_ONLY_OPT()) { ret = BUILTIN_ISINF_F32(t) ? - AS_FLOAT(PINFBITPATT_SP32) : ret; + PINF_F32 : ret; } return ret; diff --git a/ocml/src/hypotH.cl b/ocml/src/hypotH.cl index b5cb85a8..154930b3 100644 --- a/ocml/src/hypotH.cl +++ b/ocml/src/hypotH.cl @@ -20,7 +20,7 @@ MATH_MANGLE(hypot)(half x, half y) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISINF_F16(x) | BUILTIN_ISINF_F16(y)) ? - AS_HALF((ushort)PINFBITPATT_HP16) : ret; + PINF_F16 : ret; } return ret; diff --git a/ocml/src/len3D.cl b/ocml/src/len3D.cl index 7d1e991c..7884ccc8 100644 --- a/ocml/src/len3D.cl +++ b/ocml/src/len3D.cl @@ -36,7 +36,7 @@ MATH_MANGLE(len3)(double x, double y, double z) BUILTIN_ISNAN_F64(z)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | - BUILTIN_ISINF_F64(z)) ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + BUILTIN_ISINF_F64(z)) ? PINF_F64 : ret; } return ret; diff --git a/ocml/src/len3F.cl b/ocml/src/len3F.cl index 69b2d8e0..cee0e377 100644 --- a/ocml/src/len3F.cl +++ b/ocml/src/len3F.cl @@ -33,7 +33,7 @@ MATH_MANGLE(len3)(float x, float y, float z) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISINF_F32(x) | BUILTIN_ISINF_F32(y) | - BUILTIN_ISINF_F32(z)) ? AS_FLOAT(PINFBITPATT_SP32) : ret; + BUILTIN_ISINF_F32(z)) ? PINF_F32 : ret; } return ret; diff --git a/ocml/src/len3H.cl b/ocml/src/len3H.cl index 9a8d610f..5ea27309 100644 --- a/ocml/src/len3H.cl +++ b/ocml/src/len3H.cl @@ -21,7 +21,7 @@ MATH_MANGLE(len3)(half x, half y, half z) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISINF_F16(x) | BUILTIN_ISINF_F16(y) | - BUILTIN_ISINF_F16(z)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + BUILTIN_ISINF_F16(z)) ? PINF_F16 : ret; } return ret; diff --git a/ocml/src/len4D.cl b/ocml/src/len4D.cl index bd4ec4d1..1b8f5c87 100644 --- a/ocml/src/len4D.cl +++ b/ocml/src/len4D.cl @@ -44,7 +44,7 @@ MATH_MANGLE(len4)(double x, double y, double z, double w) ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | BUILTIN_ISINF_F64(z) | - BUILTIN_ISINF_F64(w)) ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + BUILTIN_ISINF_F64(w)) ? PINF_F64 : ret; } return ret; diff --git a/ocml/src/len4F.cl b/ocml/src/len4F.cl index d2b42a62..d0a352f4 100644 --- a/ocml/src/len4F.cl +++ b/ocml/src/len4F.cl @@ -42,7 +42,7 @@ MATH_MANGLE(len4)(float x, float y, float z, float w) ret = (BUILTIN_ISINF_F32(x) | BUILTIN_ISINF_F32(y) | BUILTIN_ISINF_F32(z) | - BUILTIN_ISINF_F32(w)) ? AS_FLOAT(PINFBITPATT_SP32) : ret; + BUILTIN_ISINF_F32(w)) ? PINF_F32 : ret; } return ret; diff --git a/ocml/src/len4H.cl b/ocml/src/len4H.cl index a828b39d..1a386188 100644 --- a/ocml/src/len4H.cl +++ b/ocml/src/len4H.cl @@ -23,7 +23,7 @@ MATH_MANGLE(len4)(half x, half y, half z, half w) ret = (BUILTIN_ISINF_F16(x) | BUILTIN_ISINF_F16(y) | BUILTIN_ISINF_F16(z) | - BUILTIN_ISINF_F16(w)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + BUILTIN_ISINF_F16(w)) ? PINF_F16 : ret; } return ret; diff --git a/ocml/src/lgamma_rD.cl b/ocml/src/lgamma_rD.cl index df948c7a..c7514120 100644 --- a/ocml/src/lgamma_rD.cl +++ b/ocml/src/lgamma_rD.cl @@ -276,7 +276,7 @@ MATH_MANGLE(lgamma_r_impl)(double x) double negadj = MATH_MANGLE(log)(MATH_DIV(pi, BUILTIN_ABS_F64(t * x))); ret = negadj - ret; bool z = BUILTIN_FRACTION_F64(x) == 0.0; - ret = z ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = z ? PINF_F64 : ret; s = t < 0.0 ? -1 : 1; s = z ? 0 : s; } else { @@ -286,7 +286,7 @@ MATH_MANGLE(lgamma_r_impl)(double x) if (!FINITE_ONLY_OPT()) { // Handle negative integer, Inf, NaN - ret = BUILTIN_CLASS_F64(ax, CLASS_NZER|CLASS_PZER|CLASS_PINF) | (x < 0.0f & hax >= 0x43300000) ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = BUILTIN_CLASS_F64(ax, CLASS_NZER|CLASS_PZER|CLASS_PINF) | (x < 0.0f & hax >= 0x43300000) ? PINF_F64 : ret; ret = BUILTIN_ISNAN_F64(x) ? x : ret; } diff --git a/ocml/src/lgamma_rF.cl b/ocml/src/lgamma_rF.cl index 1ad2375e..c822b66c 100644 --- a/ocml/src/lgamma_rF.cl +++ b/ocml/src/lgamma_rF.cl @@ -267,7 +267,7 @@ MATH_MANGLE(lgamma_r_impl)(float x) float negadj = MATH_MANGLE(log)(MATH_DIV(pi, BUILTIN_ABS_F32(t * x))); ret = negadj - ret; bool z = BUILTIN_FRACTION_F32(x) == 0.0f; - ret = z ? AS_FLOAT(PINFBITPATT_SP32) : ret; + ret = z ? PINF_F32 : ret; s = t < 0.0f ? -1 : 1; s = z ? 0 : s; } else { @@ -277,7 +277,7 @@ MATH_MANGLE(lgamma_r_impl)(float x) if (!FINITE_ONLY_OPT()) { ret = ((ax != 0.0f) && !BUILTIN_ISINF_F32(ax) && - ((x >= 0.0f) || (ax < 0x1.0p+23f))) ? ret : AS_FLOAT(PINFBITPATT_SP32); + ((x >= 0.0f) || (ax < 0x1.0p+23f))) ? ret : PINF_F32; ret = BUILTIN_ISNAN_F32(x) ? x : ret; } diff --git a/ocml/src/log1pD.cl b/ocml/src/log1pD.cl index e86e5787..86d135ee 100644 --- a/ocml/src/log1pD.cl +++ b/ocml/src/log1pD.cl @@ -20,7 +20,7 @@ MATH_MANGLE(log1p)(double x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z; z = x < -1.0 ? QNAN_F64 : z; - z = x == -1.0 ? AS_DOUBLE(NINFBITPATT_DP64) : z; + z = x == -1.0 ? NINF_F64 : z; } return z; diff --git a/ocml/src/log1pF.cl b/ocml/src/log1pF.cl index 8b4e11a2..7e2b1a3d 100644 --- a/ocml/src/log1pF.cl +++ b/ocml/src/log1pF.cl @@ -20,7 +20,7 @@ MATH_MANGLE(log1p)(float x) if (!FINITE_ONLY_OPT()) { z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z; z = x < -1.0f ? QNAN_F32 : z; - z = x == -1.0f ? AS_FLOAT(NINFBITPATT_SP32) : z; + z = x == -1.0f ? NINF_F32 : z; } return BUILTIN_ABS_F32(x) < 0x1.0p-24f ? x : z; diff --git a/ocml/src/logD_base.h b/ocml/src/logD_base.h index 2d918c7e..ff5b916d 100644 --- a/ocml/src/logD_base.h +++ b/ocml/src/logD_base.h @@ -46,7 +46,7 @@ MATH_MANGLE(log)(double a) if (!FINITE_ONLY_OPT()) { ret = BUILTIN_ISINF_F64(a) ? a : ret; ret = a < 0.0 ? QNAN_F64 : ret; - ret = a == 0.0 ? AS_DOUBLE(NINFBITPATT_DP64) : ret; + ret = a == 0.0 ? NINF_F64 : ret; } return ret; diff --git a/ocml/src/logbD.cl b/ocml/src/logbD.cl index 4ec287fe..e5eb431f 100644 --- a/ocml/src/logbD.cl +++ b/ocml/src/logbD.cl @@ -15,7 +15,7 @@ MATH_MANGLE(logb)(double x) if (!FINITE_ONLY_OPT()) { double ax = BUILTIN_ABS_F64(x); ret = BUILTIN_ISFINITE_F64(ax) ? ret : ax; - ret = x == 0.0 ? AS_DOUBLE(NINFBITPATT_DP64) : ret; + ret = x == 0.0 ? NINF_F64 : ret; } return ret; diff --git a/ocml/src/logbF.cl b/ocml/src/logbF.cl index ca55923f..d8a42425 100644 --- a/ocml/src/logbF.cl +++ b/ocml/src/logbF.cl @@ -15,7 +15,7 @@ MATH_MANGLE(logb)(float x) if (!FINITE_ONLY_OPT()) { float ax = BUILTIN_ABS_F32(x); ret = BUILTIN_ISFINITE_F32(ax) ? ret : ax; - ret = x == 0.0f ? AS_FLOAT(NINFBITPATT_SP32) : ret; + ret = x == 0.0f ? NINF_F32 : ret; } return ret; diff --git a/ocml/src/logbH.cl b/ocml/src/logbH.cl index 3e2af75b..f56ebe7e 100644 --- a/ocml/src/logbH.cl +++ b/ocml/src/logbH.cl @@ -17,7 +17,7 @@ MATH_MANGLE(logb)(half x) if (!FINITE_ONLY_OPT()) { half ax = BUILTIN_ABS_F16(x); ret = BUILTIN_ISFINITE_F16(ax) ? ret : ax; - ret = x == 0.0h ? AS_HALF((short)NINFBITPATT_HP16) : ret; + ret = x == 0.0h ? NINF_F16 : ret; } return ret; diff --git a/ocml/src/powD_base.h b/ocml/src/powD_base.h index bc34e522..072e7183 100644 --- a/ocml/src/powD_base.h +++ b/ocml/src/powD_base.h @@ -58,8 +58,8 @@ MATH_MANGLE(pow)(double x, double y) // Now all the edge cases #if defined COMPILING_POWR - double iz = y < 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : 0.0; - double zi = y < 0.0 ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64); + double iz = y < 0.0 ? PINF_F64 : 0.0; + double zi = y < 0.0 ? 0.0 : PINF_F64; if (x == 0.0) ret = iz; @@ -80,7 +80,8 @@ MATH_MANGLE(pow)(double x, double y) ret = QNAN_F64; #elif defined COMPILING_POWN if (BUILTIN_ISINF_F64(ax) || x == 0.0) - ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); + ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : PINF_F64, + inty == 1 ? x : 0.0); if (BUILTIN_ISNAN_F64(x)) ret = QNAN_F64; @@ -89,7 +90,8 @@ MATH_MANGLE(pow)(double x, double y) ret = 1.0; #elif defined COMPILING_ROOTN if (BUILTIN_ISINF_F64(ax) || x == 0.0) - ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); + ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (ny < 0) ? 0.0 : PINF_F64, + inty == 1 ? x : 0.0); if ((x < 0.0 && inty != 1) || ny == 0) ret = QNAN_F64; @@ -101,7 +103,8 @@ MATH_MANGLE(pow)(double x, double y) ret = ax == 1.0 ? ax : (samesign(y, ax - 1.0) ? ay : 0.0); if (BUILTIN_ISINF_F64(ax) || x == 0.0) - ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : AS_DOUBLE(PINFBITPATT_DP64), inty == 1 ? x : 0.0); + ret = BUILTIN_COPYSIGN_F64((x == 0.0) ^ (y < 0.0) ? 0.0 : PINF_F64, + inty == 1 ? x : 0.0); if (BUILTIN_ISNAN_F64(x) || BUILTIN_ISNAN_F64(y)) ret = QNAN_F64; diff --git a/ocml/src/powF_base.h b/ocml/src/powF_base.h index 2fff63f6..0a2e8b29 100644 --- a/ocml/src/powF_base.h +++ b/ocml/src/powF_base.h @@ -76,8 +76,8 @@ MATH_MANGLE(pow)(float x, float y) // Now all the edge cases #if defined COMPILING_POWR - float iz = y < 0.0f ? AS_FLOAT(PINFBITPATT_SP32) : 0.0f; - float zi = y < 0.0f ? 0.0f : AS_FLOAT(PINFBITPATT_SP32); + float iz = y < 0.0f ? PINF_F32 : 0.0f; + float zi = y < 0.0f ? 0.0f : PINF_F32; if (x == 0.0f) ret = iz; @@ -98,7 +98,8 @@ MATH_MANGLE(pow)(float x, float y) ret = QNAN_F32; #elif defined COMPILING_POWN if (BUILTIN_ISINF_F32(ax) || x == 0.0f) - ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); + ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : PINF_F32, + inty == 1 ? x : 0.0f); if (BUILTIN_ISNAN_F32(x)) ret = QNAN_F32; @@ -107,7 +108,8 @@ MATH_MANGLE(pow)(float x, float y) ret = 1.0f; #elif defined COMPILING_ROOTN if (BUILTIN_ISINF_F32(ax) || x == 0.0f) - ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); + ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (ny < 0) ? 0.0f : PINF_F32, + inty == 1 ? x : 0.0f); if ((x < 0.0f && inty != 1) || ny == 0) ret = QNAN_F32; @@ -119,7 +121,8 @@ MATH_MANGLE(pow)(float x, float y) ret = ax == 1.0f ? ax : (samesign(y, ax - 1.0f) ? ay : 0.0f); if (BUILTIN_ISINF_F32(ax) || x == 0.0f) - ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : AS_FLOAT(PINFBITPATT_SP32), inty == 1 ? x : 0.0f); + ret = BUILTIN_COPYSIGN_F32((x == 0.0f) ^ (y < 0.0f) ? 0.0f : PINF_F32, + inty == 1 ? x : 0.0f); if (BUILTIN_ISNAN_F32(x) || BUILTIN_ISNAN_F32(y)) ret = QNAN_F32; diff --git a/ocml/src/powH_base.h b/ocml/src/powH_base.h index c3055dd0..ac47f6eb 100644 --- a/ocml/src/powH_base.h +++ b/ocml/src/powH_base.h @@ -67,18 +67,18 @@ MATH_MANGLE(pow)(half x, half y) bool y_pos = BUILTIN_CLASS_F16(y, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); if (!FINITE_ONLY_OPT()) { - ret = (ax_lt_1 & y_eq_ninf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_lt_1 & y_eq_ninf) ? PINF_F16 : ret; ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; ret = (ax_eq_1 & ay_lt_inf) ? 1.0h : ret; ret = (ax_eq_1 & ay_eq_pinf) ? QNAN_F16 : ret; ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; - ret = (ax_gt_1 & y_eq_pinf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_gt_1 & y_eq_pinf) ? PINF_F16 : ret; ret = (ax_lt_pinf & ay_eq_0) ? 1.0h : ret; ret = (ax_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (ax_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; - ret = (ax_eq_pinf & y_eq_pinf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_pinf & y_pos) ? PINF_F16 : ret; + ret = (ax_eq_pinf & y_eq_pinf) ? PINF_F16 : ret; ret = (ax_eq_pinf & ay_eq_0) ? QNAN_F16 : ret; - ret = (ax_eq_0 & !y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & !y_pos) ? PINF_F16 : ret; ret = (ax_eq_0 & y_pos) ? 0.0h : ret; ret = (ax_eq_0 & ay_eq_0) ? QNAN_F16 : ret; ret = (ax_ne_0 & !x_pos) ? QNAN_F16 : ret; @@ -100,18 +100,18 @@ MATH_MANGLE(pow)(half x, half y) bool y_pos = ny >= 0; if (!FINITE_ONLY_OPT()) { - half xinf = BUILTIN_COPYSIGN_F16(AS_HALF((ushort)PINFBITPATT_HP16), x); + half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty == 2)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & !y_pos & (inty == 2)) ? PINF_F16 : ret; ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; ret = (x_eq_ninf & !y_pos & (inty != 1)) ? 0.0h : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? AS_HALF((ushort)NINFBITPATT_HP16) : ret; - ret = (x_eq_ninf & y_pos & (inty != 1)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; + ret = (x_eq_ninf & y_pos & (inty != 1)) ? PINF_F16 : ret; ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; ret = ax_eq_nan ? x : ret; } else { half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); @@ -131,16 +131,16 @@ MATH_MANGLE(pow)(half x, half y) if (!FINITE_ONLY_OPT()) { ret = (!x_pos & (inty == 2)) ? QNAN_F16 : ret; - half xinf = BUILTIN_COPYSIGN_F16(AS_HALF((ushort)PINFBITPATT_HP16), x); + half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty == 2)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & !y_pos & (inty == 2)) ? PINF_F16 : ret; ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? AS_HALF((ushort)NINFBITPATT_HP16) : ret; + ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; ret = ax_eq_nan ? x : ret; ret = ny == 0 ? QNAN_F16 : ret; } else { @@ -170,31 +170,31 @@ MATH_MANGLE(pow)(half x, half y) if (!FINITE_ONLY_OPT()) { ret = (!x_pos & (inty == 0)) ? QNAN_F16 : ret; - ret = (ax_lt_1 & y_eq_ninf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_lt_1 & y_eq_ninf) ? PINF_F16 : ret; ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; - ret = (ax_gt_1 & y_eq_pinf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; - half xinf = BUILTIN_COPYSIGN_F16(AS_HALF((ushort)PINFBITPATT_HP16), x); + ret = (ax_gt_1 & y_eq_pinf) ? PINF_F16 : ret; + half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty != 1)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & !y_pos & (inty != 1)) ? PINF_F16 : ret; half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; ret = (ax_eq_0 & y_pos & (inty != 1)) ? 0.0h : ret; - ret = (ax_eq_0 & y_eq_ninf) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & y_eq_ninf) ? PINF_F16 : ret; ret = ((x == -1.0h) & ay_eq_pinf) ? 1.0h : ret; ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; ret = (x_eq_ninf & !y_pos & (inty != 1)) ? 0.0h : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? AS_HALF((ushort)NINFBITPATT_HP16) : ret; - ret = (x_eq_ninf & y_pos & (inty != 1)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; + ret = (x_eq_ninf & y_pos & (inty != 1)) ? PINF_F16 : ret; ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; ret = ax_eq_nan ? x : ret; ret = ay_eq_nan ? y : ret; } else { // XXX work around conformance test incorrectly checking these cases - half xinf = BUILTIN_COPYSIGN_F16(AS_HALF((ushort)PINFBITPATT_HP16), x); + half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty != 1)) ? AS_HALF((ushort)PINFBITPATT_HP16) : ret; + ret = (ax_eq_0 & !y_pos & (inty != 1)) ? PINF_F16 : ret; half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; diff --git a/ocml/src/rcbrtD.cl b/ocml/src/rcbrtD.cl index 403c3dc3..902f2f7f 100644 --- a/ocml/src/rcbrtD.cl +++ b/ocml/src/rcbrtD.cl @@ -29,7 +29,7 @@ MATH_MANGLE(rcbrt)(double x) if (!FINITE_ONLY_OPT()) { c = BUILTIN_CLASS_F64(a, CLASS_PINF) ? 0.0 : c; - c = x == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : c; + c = x == 0.0 ? PINF_F64 : c; } return BUILTIN_COPYSIGN_F64(c, x); diff --git a/ocml/src/rhypotD.cl b/ocml/src/rhypotD.cl index d21521eb..126b01d3 100644 --- a/ocml/src/rhypotD.cl +++ b/ocml/src/rhypotD.cl @@ -23,7 +23,7 @@ MATH_MANGLE(rhypot)(double x, double y) double ret = BUILTIN_FLDEXP_F64(z, -e); if (!FINITE_ONLY_OPT()) { - ret = t == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = t == 0.0 ? PINF_F64 : ret; ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? QNAN_F64 : ret; diff --git a/ocml/src/rlen3D.cl b/ocml/src/rlen3D.cl index 267a78da..dcb5b053 100644 --- a/ocml/src/rlen3D.cl +++ b/ocml/src/rlen3D.cl @@ -35,7 +35,7 @@ MATH_MANGLE(rlen3)(double x, double y, double z) double ret = BUILTIN_FLDEXP_F64(v, -e); if (!FINITE_ONLY_OPT()) { - ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = a == 0.0 ? PINF_F64 : ret; ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | diff --git a/ocml/src/rlen4D.cl b/ocml/src/rlen4D.cl index 7d98a3fc..2fe9bf4b 100644 --- a/ocml/src/rlen4D.cl +++ b/ocml/src/rlen4D.cl @@ -43,7 +43,7 @@ MATH_MANGLE(rlen4)(double x, double y, double z, double w) double ret = BUILTIN_FLDEXP_F64(v, -e); if (!FINITE_ONLY_OPT()) { - ret = a == 0.0 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = a == 0.0 ? PINF_F64 : ret; ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | diff --git a/ocml/src/sinhD.cl b/ocml/src/sinhD.cl index 0bab018b..83e87611 100644 --- a/ocml/src/sinhD.cl +++ b/ocml/src/sinhD.cl @@ -21,7 +21,7 @@ MATH_MANGLE(sinh)(double x) double z = s.hi; if (!FINITE_ONLY_OPT()) { - z = y >= 0x1.633ce8fb9f87ep+9 ? AS_DOUBLE(PINFBITPATT_DP64) : z; + z = y >= 0x1.633ce8fb9f87ep+9 ? PINF_F64 : z; } z = y < 0x1.0p-27 ? y : z; diff --git a/ocml/src/sinhF.cl b/ocml/src/sinhF.cl index 9ea55fc9..e2174103 100644 --- a/ocml/src/sinhF.cl +++ b/ocml/src/sinhF.cl @@ -21,7 +21,7 @@ MATH_MANGLE(sinh)(float x) float z = s.hi; if (!FINITE_ONLY_OPT()) { - z = y > 0x1.65a9f8p+6f ? AS_FLOAT(PINFBITPATT_SP32) : z; + z = y > 0x1.65a9f8p+6f ? PINF_F32 : z; } z = y < 0x1.0p-12f ? y : z; diff --git a/ocml/src/tgammaD.cl b/ocml/src/tgammaD.cl index d85e253e..d2550dfa 100644 --- a/ocml/src/tgammaD.cl +++ b/ocml/src/tgammaD.cl @@ -132,7 +132,7 @@ MATH_MANGLE(tgamma)(double x) double zpow = MATH_MANGLE(powr)(z, MATH_MAD(0.5, z, -0.25)); if (x >= 0.0) { ret = MATH_DIV(etonegz*zpow*zpow*poly, dterm); - ret = x > 0x1.573fae561f647p+7 ? AS_DOUBLE(PINFBITPATT_DP64) : ret; + ret = x > 0x1.573fae561f647p+7 ? PINF_F64 : ret; } else if (x < 0.0) { if (x >= -170.5) { ret = MATH_DIV(pi*dterm, etonegz*zpow*zpow*poly*negadj); diff --git a/ocml/src/tgammaF.cl b/ocml/src/tgammaF.cl index 7671d20d..6aa2a377 100644 --- a/ocml/src/tgammaF.cl +++ b/ocml/src/tgammaF.cl @@ -39,7 +39,7 @@ MATH_MANGLE(tgamma)(float x) if (x > 0.0f) { float p = sqrt2pi*t2*t1*t1 * MATH_FAST_RCP(d); ret = MATH_MAD(p, pt, p); - ret = x > 0x1.18521ep+5f ? AS_FLOAT(PINFBITPATT_SP32) : ret; + ret = x > 0x1.18521ep+5f ? PINF_F32 : ret; } else { float s = MATH_MANGLE(sinpi)(x); float p = s*x*t2*t1*t1; From abc2f1069ad64ba2d8d4075acb4e0be993e53ef3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Dec 2022 15:35:22 -0500 Subject: [PATCH 03/22] Use BUILTIN_ISINF_F64 Change-Id: Ieb26c1270a90b2d9fdc0e6fe7edc151f1ca29c81 --- ocml/src/fractD.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocml/src/fractD.cl b/ocml/src/fractD.cl index 0248e21d..95c46617 100644 --- a/ocml/src/fractD.cl +++ b/ocml/src/fractD.cl @@ -17,7 +17,7 @@ MATH_MANGLE(fract)(double x, __private double *ip) f = BUILTIN_MIN_F64(x - i, 0x1.fffffffffffffp-1); if (!FINITE_ONLY_OPT()) { f = BUILTIN_ISNAN_F64(x) ? x : f; - f = BUILTIN_CLASS_F64(x, CLASS_PINF|CLASS_NINF) ? 0.0 : f; + f = BUILTIN_ISINF_F64(x) ? 0.0 : f; } } else { f = BUILTIN_FRACTION_F64(x); From 116ed9ac0cadf557d6cdcf25ce84c54bf16bbf87 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 8 Dec 2022 16:02:22 -0500 Subject: [PATCH 04/22] Use new __builtin_elementwise_* functions instead of IR wrappers These were added last year and work with any scalar or vector type (so we don't really need to have the different _type suffixed macros). Not all the builtins are covered. Currently roundeven does exist, which I believe is equivalent to round and rint. I'm confused by having 3 names for the same operation, so leave the most likely to break for a later change. I have a patch out to add copysign and canonicalize. fma will be a bit more work because the code was written only for 2 operands and has some promotion rules to consider. Change-Id: I8acf49ba718f32dd9a97380c78c5f95029b5d795 --- irif/inc/irif.h | 6 ------ ocml/src/builtins.h | 16 ++++++++-------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/irif/inc/irif.h b/irif/inc/irif.h index 7059e775..b848d2bf 100644 --- a/irif/inc/irif.h +++ b/irif/inc/irif.h @@ -17,13 +17,7 @@ extern __attribute__((const)) float2 __llvm_fma_2f32(float2, float2, float2) __asm("llvm.fma.v2f32"); extern __attribute__((const)) half2 __llvm_fma_2f16(half2, half2, half2) __asm("llvm.fma.v2f16"); -extern __attribute__((const)) half2 __llvm_fabs_2f16(half2) __asm("llvm.fabs.v2f16"); -extern __attribute__((const)) half2 __llvm_minnum_2f16(half2, half2) __asm("llvm.minnum.v2f16"); -extern __attribute__((const)) half2 __llvm_maxnum_2f16(half2, half2) __asm("llvm.maxnum.v2f16"); extern __attribute__((const)) half2 __llvm_copysign_2f16(half2, half2) __asm("llvm.copysign.v2f16"); -extern __attribute__((const)) half2 __llvm_floor_2f16(half2) __asm("llvm.floor.v2f16"); -extern __attribute__((const)) half2 __llvm_ceil_2f16(half2) __asm("llvm.ceil.v2f16"); -extern __attribute__((const)) half2 __llvm_trunc_2f16(half2) __asm("llvm.trunc.v2f16"); extern __attribute__((const)) half2 __llvm_round_2f16(half2) __asm("llvm.round.v2f16"); extern __attribute__((const)) half2 __llvm_rint_2f16(half2) __asm("llvm.rint.v2f16"); extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm.canonicalize.v2f16"); diff --git a/ocml/src/builtins.h b/ocml/src/builtins.h index 77e9cc34..085b8365 100644 --- a/ocml/src/builtins.h +++ b/ocml/src/builtins.h @@ -39,14 +39,14 @@ #define BUILTIN_ABS_F32 __builtin_fabsf #define BUILTIN_ABS_F64 __builtin_fabs #define BUILTIN_ABS_F16 __builtin_fabsf16 -#define BUILTIN_ABS_2F16 __llvm_fabs_2f16 +#define BUILTIN_ABS_2F16 __builtin_elementwise_abs #define BUILTIN_BITALIGN_B32 __builtin_amdgcn_alignbit #define BUILTIN_CEIL_F32 __builtin_ceilf #define BUILTIN_CEIL_F64 __builtin_ceil #define BUILTIN_CEIL_F16 __builtin_ceilf16 -#define BUILTIN_CEIL_2F16 __llvm_ceil_2f16 +#define BUILTIN_CEIL_2F16 __builtin_elementwise_ceil #define BUILTIN_CLASS_F32 __builtin_amdgcn_classf #define BUILTIN_CLASS_F64 __builtin_amdgcn_class @@ -88,7 +88,7 @@ #define BUILTIN_FLOOR_F32 __builtin_floorf #define BUILTIN_FLOOR_F64 __builtin_floor #define BUILTIN_FLOOR_F16 __builtin_floorf16 -#define BUILTIN_FLOOR_2F16 __llvm_floor_2f16 +#define BUILTIN_FLOOR_2F16 __builtin_elementwise_floor #define BUILTIN_FRACTION_F32(X) ({ \ float _fract_x = X; \ @@ -114,7 +114,7 @@ #define BUILTIN_MAX_F32 __builtin_fmaxf #define BUILTIN_MAX_F64 __builtin_fmax #define BUILTIN_MAX_F16 __builtin_fmaxf16 -#define BUILTIN_MAX_2F16 __llvm_maxnum_2f16 +#define BUILTIN_MAX_2F16 __builtin_elementwise_max #define BUILTIN_MAX_S32(A,B) ((A) < (B) ? (B) : (A)) #define BUILTIN_MAX_U32(A,B) ((A) < (B) ? (B) : (A)) @@ -122,7 +122,7 @@ #define BUILTIN_MIN_F32 __builtin_fminf #define BUILTIN_MIN_F64 __builtin_fmin #define BUILTIN_MIN_F16 __builtin_fminf16 -#define BUILTIN_MIN_2F16 __llvm_minnum_2f16 +#define BUILTIN_MIN_2F16 __builtin_elementwise_min #define BUILTIN_MIN_S32(A,B) ((A) < (B) ? (A) : (B)) #define BUILTIN_MIN_U32(A,B) ((A) < (B) ? (A) : (B)) @@ -164,7 +164,7 @@ #define BUILTIN_TRUNC_F32 __builtin_truncf #define BUILTIN_TRUNC_F64 __builtin_trunc #define BUILTIN_TRUNC_F16 __builtin_truncf16 -#define BUILTIN_TRUNC_2F16 __llvm_trunc_2f16 +#define BUILTIN_TRUNC_2F16 __builtin_elementwise_trunc #define BUILTIN_ROUND_F32 __builtin_roundf #define BUILTIN_ROUND_F64 __builtin_round @@ -213,12 +213,12 @@ #define BUILTIN_CMAX_F32 __builtin_fmaxf #define BUILTIN_CMAX_F64 __builtin_fmax #define BUILTIN_CMAX_F16 __builtin_fmaxf16 -#define BUILTIN_CMAX_2F16 __llvm_maxnum_2f16 +#define BUILTIN_CMAX_2F16 __builtin_elementwise_max #define BUILTIN_CMIN_F32 __builtin_fminf #define BUILTIN_CMIN_F64 __builtin_fmin #define BUILTIN_CMIN_F16 __builtin_fminf16 -#define BUILTIN_CMIN_2F16 __llvm_minnum_2f16 +#define BUILTIN_CMIN_2F16 __builtin_elementwise_min #define BUILTIN_TRIG_PREOP_F64 __builtin_amdgcn_trig_preop From 432efdc8b984ebbd1a59adc071afcd7f5428dba5 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Tue, 3 Jan 2023 12:40:38 -0800 Subject: [PATCH 05/22] Update edge cases for half pow Change-Id: I8c0c14ee7e1a4469a50bc45f0b48505d08bb8638 --- ocml/src/powH_base.h | 206 ++++++++++++------------------------------- 1 file changed, 56 insertions(+), 150 deletions(-) diff --git a/ocml/src/powH_base.h b/ocml/src/powH_base.h index ac47f6eb..e6c8908f 100644 --- a/ocml/src/powH_base.h +++ b/ocml/src/powH_base.h @@ -5,6 +5,12 @@ * License. See LICENSE.TXT for details. *===------------------------------------------------------------------------*/ +static bool +samesign(half x, half y) +{ + return ((AS_USHORT(x) ^ AS_USHORT(y)) & (ushort)0x8000) == (ushort)0; +} + REQUIRES_16BIT_INSTS CONSTATTR half #if defined(COMPILING_POWR) MATH_MANGLE(powr)(half x, half y) @@ -49,159 +55,59 @@ MATH_MANGLE(pow)(half x, half y) // Now all the edge cases #if defined COMPILING_POWR - bool ax_eq_0 = ax == 0.0h; - bool ax_ne_0 = ax != 0.0h; - bool ax_lt_1 = ax < 1.0h; - bool ax_eq_1 = ax == 1.0h; - bool ax_gt_1 = ax > 1.0h; - bool ax_lt_pinf = BUILTIN_CLASS_F16(x, CLASS_PNOR|CLASS_PSUB); - bool ax_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_eq_nan = BUILTIN_ISNAN_F16(x); - bool x_pos = BUILTIN_CLASS_F16(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - bool ay_eq_0 = ay == 0.0h; - bool ay_eq_pinf = BUILTIN_CLASS_F16(ay, CLASS_PINF); - bool ay_eq_nan = BUILTIN_ISNAN_F16(ay); - bool y_eq_ninf = BUILTIN_CLASS_F16(y, CLASS_NINF); - bool y_eq_pinf = BUILTIN_CLASS_F16(y, CLASS_PINF); - bool ay_lt_inf = BUILTIN_CLASS_F16(y, CLASS_PNOR|CLASS_PSUB); - bool y_pos = BUILTIN_CLASS_F16(y, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - - if (!FINITE_ONLY_OPT()) { - ret = (ax_lt_1 & y_eq_ninf) ? PINF_F16 : ret; - ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; - ret = (ax_eq_1 & ay_lt_inf) ? 1.0h : ret; - ret = (ax_eq_1 & ay_eq_pinf) ? QNAN_F16 : ret; - ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; - ret = (ax_gt_1 & y_eq_pinf) ? PINF_F16 : ret; - ret = (ax_lt_pinf & ay_eq_0) ? 1.0h : ret; - ret = (ax_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (ax_eq_pinf & y_pos) ? PINF_F16 : ret; - ret = (ax_eq_pinf & y_eq_pinf) ? PINF_F16 : ret; - ret = (ax_eq_pinf & ay_eq_0) ? QNAN_F16 : ret; - ret = (ax_eq_0 & !y_pos) ? PINF_F16 : ret; - ret = (ax_eq_0 & y_pos) ? 0.0h : ret; - ret = (ax_eq_0 & ay_eq_0) ? QNAN_F16 : ret; - ret = (ax_ne_0 & !x_pos) ? QNAN_F16 : ret; - ret = ax_eq_nan ? x : ret; - ret = ay_eq_nan ? y : ret; - } else { - ret = ax_eq_1 ? 1.0h : ret; - ret = ay_eq_0 ? 1.0h : ret; - ret = (ax_eq_0 & y_pos) ? 0.0h : ret; - } + half iz = y < 0.0h ? PINF_F16 : 0.0h; + half zi = y < 0.0h ? 0.0h : PINF_F16; + + if (x == 0.0h) + ret = iz; + + if (BUILTIN_ISINF_F16(x)) + ret = zi; + + if (BUILTIN_ISINF_F16(y)) + ret = ax < 1.0h ? iz : zi; + + if (y == 0.0h) + ret = x == 0.0h || BUILTIN_ISINF_F16(x) ? QNAN_F16 : 1.0h; + + if (x == 1.0h) + ret = BUILTIN_ISINF_F16(y) ? QNAN_F16 : 1.0h; + + if (x < 0.0h || BUILTIN_ISNAN_F16(x) || BUILTIN_ISNAN_F16(y)) + ret = QNAN_F16; #elif defined COMPILING_POWN - bool ax_eq_0 = ax == 0.0h; - bool x_eq_ninf = BUILTIN_CLASS_F16(x, CLASS_NINF); - bool x_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_lt_pinf = BUILTIN_CLASS_F16(x, CLASS_PNOR|CLASS_PSUB); - bool ax_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_eq_nan = BUILTIN_ISNAN_F16(x); - bool x_pos = BUILTIN_CLASS_F16(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - bool y_pos = ny >= 0; - - if (!FINITE_ONLY_OPT()) { - half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); - ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty == 2)) ? PINF_F16 : ret; - ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; - ret = (x_eq_ninf & !y_pos & (inty != 1)) ? 0.0h : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; - ret = (x_eq_ninf & y_pos & (inty != 1)) ? PINF_F16 : ret; - ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; - ret = ax_eq_nan ? x : ret; - } else { - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; - } - ret = ny == 0 ? 1.0h : ret; + if (BUILTIN_ISINF_F16(ax) || x == 0.0h) + ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (ny < 0) ? 0.0h : PINF_F16, + inty == 1 ? x : 0.0h); + + if (BUILTIN_ISNAN_F16(x)) + ret = QNAN_F16; + + if (ny == 0) + ret = 1.0h; #elif defined COMPILING_ROOTN - bool ax_eq_0 = ax == 0.0h; - bool x_eq_ninf = BUILTIN_CLASS_F16(x, CLASS_NINF); - bool x_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_lt_pinf = BUILTIN_CLASS_F16(x, CLASS_PNOR|CLASS_PSUB); - bool ax_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_eq_nan = BUILTIN_ISNAN_F16(x); - bool x_pos = BUILTIN_CLASS_F16(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - bool y_pos = ny >= 0; - - if (!FINITE_ONLY_OPT()) { - ret = (!x_pos & (inty == 2)) ? QNAN_F16 : ret; - half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); - ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty == 2)) ? PINF_F16 : ret; - ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; - ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; - ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; - ret = ax_eq_nan ? x : ret; - ret = ny == 0 ? QNAN_F16 : ret; - } else { - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (ax_eq_0 & y_pos & (inty == 2)) ? 0.0h : ret; - } + if (BUILTIN_ISINF_F16(ax) || x == 0.0h) + ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (ny < 0) ? 0.0h : PINF_F16, + inty == 1 ? x : 0.0h); + + if ((x < 0.0h && inty != 1) || ny == 0) + ret = QNAN_F16; #else - bool ax_eq_0 = ax == 0.0h; - bool ax_ne_0 = ax != 0.0h; - bool ax_lt_1 = ax < 1.0h; - bool ax_eq_1 = ax == 1.0h; - bool ax_gt_1 = ax > 1.0h; - bool ax_lt_pinf = BUILTIN_CLASS_F16(x, CLASS_PNOR|CLASS_PSUB); - bool ax_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ax_eq_nan = BUILTIN_ISNAN_F16(x); - bool x_pos = BUILTIN_CLASS_F16(x, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - bool x_eq_ninf = BUILTIN_CLASS_F16(x, CLASS_NINF); - bool x_eq_pinf = BUILTIN_CLASS_F16(x, CLASS_PINF); - bool ay_eq_0 = ay == 0.0h; - bool ay_eq_pinf = BUILTIN_CLASS_F16(ay, CLASS_PINF); - bool ay_eq_nan = BUILTIN_ISNAN_F16(ay); - bool y_eq_ninf = BUILTIN_CLASS_F16(y, CLASS_NINF); - bool y_eq_pinf = BUILTIN_CLASS_F16(y, CLASS_PINF); - bool ay_lt_inf = BUILTIN_CLASS_F16(y, CLASS_PNOR|CLASS_PSUB); - bool y_pos = BUILTIN_CLASS_F16(y, CLASS_PZER|CLASS_PSUB|CLASS_PNOR|CLASS_PINF); - - if (!FINITE_ONLY_OPT()) { - ret = (!x_pos & (inty == 0)) ? QNAN_F16 : ret; - ret = (ax_lt_1 & y_eq_ninf) ? PINF_F16 : ret; - ret = (ax_gt_1 & y_eq_ninf) ? 0.0h : ret; - ret = (ax_lt_1 & y_eq_pinf) ? 0.0h : ret; - ret = (ax_gt_1 & y_eq_pinf) ? PINF_F16 : ret; - half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); - ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty != 1)) ? PINF_F16 : ret; - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (ax_eq_0 & y_pos & (inty != 1)) ? 0.0h : ret; - ret = (ax_eq_0 & y_eq_ninf) ? PINF_F16 : ret; - ret = ((x == -1.0h) & ay_eq_pinf) ? 1.0h : ret; - ret = (x_eq_ninf & !y_pos & (inty == 1)) ? -0.0h : ret; - ret = (x_eq_ninf & !y_pos & (inty != 1)) ? 0.0h : ret; - ret = (x_eq_ninf & y_pos & (inty == 1)) ? NINF_F16 : ret; - ret = (x_eq_ninf & y_pos & (inty != 1)) ? PINF_F16 : ret; - ret = (x_eq_pinf & !y_pos) ? 0.0h : ret; - ret = (x_eq_pinf & y_pos) ? PINF_F16 : ret; - ret = ax_eq_nan ? x : ret; - ret = ay_eq_nan ? y : ret; - } else { - // XXX work around conformance test incorrectly checking these cases - half xinf = BUILTIN_COPYSIGN_F16(PINF_F16, x); - ret = (ax_eq_0 & !y_pos & (inty == 1)) ? xinf : ret; - ret = (ax_eq_0 & !y_pos & (inty != 1)) ? PINF_F16 : ret; - - half xzero = BUILTIN_COPYSIGN_F16(0.0h, x); - ret = (ax_eq_0 & y_pos & (inty == 1)) ? xzero : ret; - ret = (ax_eq_0 & y_pos & (inty != 1)) ? 0.0h : ret; - } - ret = ay == 0.0h ? 1.0h : ret; - ret = x == 1.0h ? 1.0h : ret; + if (x < 0.0h && !inty) + ret = QNAN_F16; + + if (BUILTIN_ISINF_F16(ay)) + ret = ax == 1.0h ? ax : (samesign(y, ax - 1.0h) ? ay : 0.0h); + + if (BUILTIN_ISINF_F16(ax) || x == 0.0h) + ret = BUILTIN_COPYSIGN_F16((x == 0.0h) ^ (y < 0.0h) ? 0.0h : PINF_F16, + inty == 1 ? x : 0.0h); + + if (BUILTIN_ISNAN_F16(x) || BUILTIN_ISNAN_F16(y)) + ret = QNAN_F16; + + if (x == 1.0h || y == 0.0h) + ret = 1.0h; #endif return ret; From c199c4878c60784ab11d8d05abb0984cccc0721a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 23 Dec 2022 09:19:43 -0500 Subject: [PATCH 06/22] Remove unnecessary explicit convergent attributes All functions are assumed convergent by default for OpenCL and it was a mistake to surface this as a visible attribute. The intrinsics also directly read convergence as a fundamental property (plus all asm declarations are doubly infected with convergence). Change-Id: I05a5570a70f5ea6bf14f2b143848dde6bcc39826 --- irif/inc/irif.h | 16 ++++++++-------- ockl/src/cg.cl | 8 ++++---- ockl/src/readuplane.cl | 8 +++----- ockl/src/wgred.cl | 2 +- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/irif/inc/irif.h b/irif/inc/irif.h index b848d2bf..ca74a41e 100644 --- a/irif/inc/irif.h +++ b/irif/inc/irif.h @@ -33,14 +33,14 @@ extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm. #define BUILTIN_CTZ_U64(x) (ulong)(x == 0u ? 64 : __builtin_ctzl(x)) // AMDGPU intrinsics -extern __attribute__((const, convergent)) ulong __llvm_amdgcn_icmp_i64_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i64.i32"); -extern __attribute__((const, convergent)) ulong __llvm_amdgcn_icmp_i64_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i64.i64"); -extern __attribute__((const, convergent)) ulong __llvm_amdgcn_fcmp_i64_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i64.f32"); -extern __attribute__((const, convergent)) ulong __llvm_amdgcn_fcmp_i64_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i64.f64"); -extern __attribute__((const, convergent)) uint __llvm_amdgcn_icmp_i32_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i32.i32"); -extern __attribute__((const, convergent)) uint __llvm_amdgcn_icmp_i32_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i32.i64"); -extern __attribute__((const, convergent)) uint __llvm_amdgcn_fcmp_i32_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i32.f32"); -extern __attribute__((const, convergent)) uint __llvm_amdgcn_fcmp_i32_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i32.f64"); +extern __attribute__((const)) ulong __llvm_amdgcn_icmp_i64_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i64.i32"); +extern __attribute__((const)) ulong __llvm_amdgcn_icmp_i64_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i64.i64"); +extern __attribute__((const)) ulong __llvm_amdgcn_fcmp_i64_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i64.f32"); +extern __attribute__((const)) ulong __llvm_amdgcn_fcmp_i64_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i64.f64"); +extern __attribute__((const)) uint __llvm_amdgcn_icmp_i32_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i32.i32"); +extern __attribute__((const)) uint __llvm_amdgcn_icmp_i32_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i32.i64"); +extern __attribute__((const)) uint __llvm_amdgcn_fcmp_i32_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i32.f32"); +extern __attribute__((const)) uint __llvm_amdgcn_fcmp_i32_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i32.f64"); // Buffer Load/Store extern __attribute__((pure)) float4 __llvm_amdgcn_struct_buffer_load_format_v4f32(uint4 rsrc, uint vindex, uint voffset, uint soffset, uint cachepolicy) __asm("llvm.amdgcn.struct.buffer.load.format.v4f32"); diff --git a/ockl/src/cg.cl b/ockl/src/cg.cl index 8cadf278..99289a58 100644 --- a/ockl/src/cg.cl +++ b/ockl/src/cg.cl @@ -85,13 +85,13 @@ multi_grid_sync(__global struct mg_sync *s, uint members) } } -__attribute__((convergent)) void +void __ockl_gws_init(uint nwm1, uint rid) { __builtin_amdgcn_ds_gws_init(nwm1, rid); } -__attribute__((convergent)) void +void __ockl_gws_barrier(uint nwm1, uint rid) { __builtin_amdgcn_ds_gws_barrier(nwm1, rid); @@ -103,7 +103,7 @@ __ockl_grid_is_valid(void) return get_mg_info_arg() != 0UL; } -__attribute__((convergent)) void +void __ockl_grid_sync(void) { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); @@ -157,7 +157,7 @@ __ockl_multi_grid_is_valid(void) } } -__attribute__((convergent)) void +void __ockl_multi_grid_sync(void) { __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); diff --git a/ockl/src/readuplane.cl b/ockl/src/readuplane.cl index d398735b..9e845157 100644 --- a/ockl/src/readuplane.cl +++ b/ockl/src/readuplane.cl @@ -7,9 +7,7 @@ #include "ockl.h" -#define WAVESIZE 64 - -#define ATTR __attribute__((convergent)) +#define WAVESIZE 64 // Function to exchange data between different lanes @@ -17,7 +15,7 @@ // offset: To be added to the lane id to obtain final index // return a int value correspoding to the lane -ATTR int +int __ockl_readuplane_i32(int var, int offset) { @@ -33,7 +31,7 @@ __ockl_readuplane_i32(int var, int offset) // offset: To be added to the lane id to obtain final index // return a long value correspoding to the lane -ATTR long +long __ockl_readuplane_i64(long var, int offset) { int lane_id = __ockl_lane_u32(); int index = lane_id + offset; diff --git a/ockl/src/wgred.cl b/ockl/src/wgred.cl index b8c3ffe2..d6187541 100644 --- a/ockl/src/wgred.cl +++ b/ockl/src/wgred.cl @@ -49,7 +49,7 @@ my_barrier(void) } #define AGEN(T,OP) \ -__attribute__((convergent)) T \ +T \ C(__ockl_wgred_,C(OP,T##_suf))(int a) \ { \ uint n = my_num_sub_groups(); \ From af782defddff2b5b4375151e027c4762779521b4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:35:05 -0500 Subject: [PATCH 07/22] Update datalayout in image intrinsic wrappers Change-Id: I16e7c3191a746db644e82d89a176f35af8a4aec6 --- irif/src/imintrin.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/irif/src/imintrin.ll b/irif/src/imintrin.ll index 39a9f924..24c9c917 100644 --- a/irif/src/imintrin.ll +++ b/irif/src/imintrin.ll @@ -1,4 +1,4 @@ -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" ; Function Attrs: alwaysinline nounwind readonly From 2fcf63d53355c7c8e7888a1acc190e531e599ebd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:51:57 -0500 Subject: [PATCH 08/22] Remove dead cttz IR wrappers Change-Id: Iaea4220d89d4f9f8dc668c78517ea54fae5018a3 --- irif/src/cz.ll | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 irif/src/cz.ll diff --git a/irif/src/cz.ll b/irif/src/cz.ll deleted file mode 100644 index a6d047d1..00000000 --- a/irif/src/cz.ll +++ /dev/null @@ -1,37 +0,0 @@ -; ===-------------------------------------------------------------------------- -; ROCm Device Libraries -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; ===-------------------------------------------------------------------------- - -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" -target triple = "amdgcn-amd-amdhsa" - -declare i8 @llvm.cttz.i8(i8, i1) #0 -declare i16 @llvm.cttz.i16(i16, i1) #0 -declare i32 @llvm.cttz.i32(i32, i1) #0 -declare i64 @llvm.cttz.i64(i64, i1) #0 - -define protected i8 @__llvm_cttz_i8(i8) #1 { - %2 = call i8 @llvm.cttz.i8(i8 %0, i1 false) - ret i8 %2 -} - -define protected i16 @__llvm_cttz_i16(i16) #1 { - %2 = call i16 @llvm.cttz.i16(i16 %0, i1 false) - ret i16 %2 -} - -define protected i32 @__llvm_cttz_i32(i32) #1 { - %2 = call i32 @llvm.cttz.i32(i32 %0, i1 false) - ret i32 %2 -} - -define protected i64 @__llvm_cttz_i64(i64) #1 { - %2 = call i64 @llvm.cttz.i64(i64 %0, i1 false) - ret i64 %2 -} - -attributes #0 = { nounwind readnone speculatable } -attributes #1 = { alwaysinline norecurse nounwind readnone } From d2c3a04ce0a1258e1f176b12dc9292f29c75e541 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:16:10 -0500 Subject: [PATCH 09/22] Drop required target features for is_shared/is_private These are no longer enforced and they codegen without flat support anyway. Change-Id: I8310b09ce2b77ffd081fb57c51784eaad50adcb8 --- ockl/src/toas.cl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ockl/src/toas.cl b/ockl/src/toas.cl index e190cb5f..a121c1c2 100644 --- a/ockl/src/toas.cl +++ b/ockl/src/toas.cl @@ -6,14 +6,15 @@ *===------------------------------------------------------------------------*/ #include "ockl.h" - -__attribute__((const, target("flat-address-space"))) bool +__attribute__((const)) +bool OCKL_MANGLE_T(is_local,addr)(const void *a) { return __builtin_amdgcn_is_shared(a); } -__attribute__((const, target("flat-address-space"))) bool +__attribute__((const)) +bool OCKL_MANGLE_T(is_private,addr)(const void *a) { return __builtin_amdgcn_is_private(a); From 261fcbd75f599933c99a6f9113a1c1536ffc5f1d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:41:29 -0500 Subject: [PATCH 10/22] Remove optnone noinline from hostcall function This is implemented correctly now and we detect the access of the specific hostcall pointer, not the call itself. Use cold to hint the inliner that it usually shouldn't inline. Change-Id: I4217ce37f43b2a8b2b82931ee146234cf32f2574 --- ockl/src/hostcall.cl | 1 + ockl/src/hostcall_impl.cl | 11 ----------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/ockl/src/hostcall.cl b/ockl/src/hostcall.cl index 40844519..5021d9ea 100644 --- a/ockl/src/hostcall.cl +++ b/ockl/src/hostcall.cl @@ -14,6 +14,7 @@ * code. Application kernels must only use __ockl_hostcall_preview() * defined below. */ +__attribute__((cold)) extern long2 __ockl_hostcall_internal(void *buffer, uint service_id, ulong arg0, ulong arg1, ulong arg2, ulong arg3, diff --git a/ockl/src/hostcall_impl.cl b/ockl/src/hostcall_impl.cl index 20af2086..1a70538f 100644 --- a/ockl/src/hostcall_impl.cl +++ b/ockl/src/hostcall_impl.cl @@ -283,18 +283,7 @@ get_return_value(__global header_t *header, __global payload_t *payload, * code. Application kernels must only use __ockl_hostcall_preview() * defined elsewhere. * - * The function is marked noinline to preserve all calls in the - * kernel. This is required because the compiler backend includes a - * check for the presence of this function as a way to determine that - * hostcall is used. - * - * FIXME: Additionally, the optnone attribute is required to ensure - * that the SelectAcceleratorCode pass in HCC does not forcibly - * inline this function. This should be removed when the SAC pass or - * HCC itself is removed. */ -__attribute__((noinline)) -__attribute__((optnone)) long2 __ockl_hostcall_internal(void *_buffer, uint service_id, ulong arg0, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5, From 089a4790ad661492ac7b62df193275666e35503d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 17 Jun 2020 22:33:26 -0400 Subject: [PATCH 11/22] Use new amdgcn.ballot builtins Eliminates more asm intrinsic declarations. Change-Id: I5eaf1a7f51da01135861a420d2359fa62ac2c0e5 --- irif/inc/irif.h | 12 ++------ ockl/src/dm.cl | 73 +++++++++++++++++++++++++++++++---------------- ockl/src/wfaas.cl | 58 ++++++++++++++++++++++++------------- 3 files changed, 88 insertions(+), 55 deletions(-) diff --git a/irif/inc/irif.h b/irif/inc/irif.h index ca74a41e..a093c290 100644 --- a/irif/inc/irif.h +++ b/irif/inc/irif.h @@ -12,6 +12,8 @@ #define REQUIRES_16BIT_INSTS __attribute__((target("16-bit-insts"))) #define REQUIRES_GFX9_INSTS __attribute__((target("gfx9-insts"))) +#define REQUIRES_WAVE32 __attribute__((target("wavefrontsize32"))) +#define REQUIRES_WAVE64 __attribute__((target("wavefrontsize64"))) // Generic intrinsics extern __attribute__((const)) float2 __llvm_fma_2f32(float2, float2, float2) __asm("llvm.fma.v2f32"); @@ -32,16 +34,6 @@ extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm. #define BUILTIN_CTZ_U32(x) (uint)(x == 0u ? 32 : __builtin_ctz(x)) #define BUILTIN_CTZ_U64(x) (ulong)(x == 0u ? 64 : __builtin_ctzl(x)) -// AMDGPU intrinsics -extern __attribute__((const)) ulong __llvm_amdgcn_icmp_i64_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i64.i32"); -extern __attribute__((const)) ulong __llvm_amdgcn_icmp_i64_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i64.i64"); -extern __attribute__((const)) ulong __llvm_amdgcn_fcmp_i64_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i64.f32"); -extern __attribute__((const)) ulong __llvm_amdgcn_fcmp_i64_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i64.f64"); -extern __attribute__((const)) uint __llvm_amdgcn_icmp_i32_i32(uint, uint, uint) __asm("llvm.amdgcn.icmp.i32.i32"); -extern __attribute__((const)) uint __llvm_amdgcn_icmp_i32_i64(ulong, ulong, uint) __asm("llvm.amdgcn.icmp.i32.i64"); -extern __attribute__((const)) uint __llvm_amdgcn_fcmp_i32_f32(float, float, uint) __asm("llvm.amdgcn.fcmp.i32.f32"); -extern __attribute__((const)) uint __llvm_amdgcn_fcmp_i32_f64(double, double, uint) __asm("llvm.amdgcn.fcmp.i32.f64"); - // Buffer Load/Store extern __attribute__((pure)) float4 __llvm_amdgcn_struct_buffer_load_format_v4f32(uint4 rsrc, uint vindex, uint voffset, uint soffset, uint cachepolicy) __asm("llvm.amdgcn.struct.buffer.load.format.v4f32"); extern __attribute__((pure)) half4 __llvm_amdgcn_struct_buffer_load_format_v4f16(uint4 rsrc, uint vindex, uint voffset, uint soffset, uint cachepolicy) __asm("llvm.amdgcn.struct.buffer.load.format.v4f16"); diff --git a/ockl/src/dm.cl b/ockl/src/dm.cl index 49bbaa0b..2dbbdac0 100644 --- a/ockl/src/dm.cl +++ b/ockl/src/dm.cl @@ -14,9 +14,6 @@ extern ulong __ockl_devmem_request(ulong addr, ulong size); -// XXX from llvm/include/llvm/IR/InstrTypes.h -#define ICMP_NE 33 - // Define this to track user requested non-slab (i.e. "large") in-use // allocations. This adds the definition of a query function nna() that // returns a snapshot of the current value. @@ -296,40 +293,66 @@ first(__global void * v) return __builtin_astype(w2, __global void *); } +REQUIRES_WAVE64 +static uint +elect_uint_wave64(int pred, uint val, uint none) { + uint ret = none; + + ulong mask = __builtin_amdgcn_ballot_w64(pred != 0); + if (mask != 0UL) { + uint l = __ockl_ctz_u64(mask); + ret = __builtin_amdgcn_ds_bpermute(l << 2, val); + } + + return ret; +} + +REQUIRES_WAVE32 +static uint +elect_uint_wave32(int pred, uint val, uint none) { + uint ret = none; + uint mask = __builtin_amdgcn_ballot_w32(pred != 0); + if (mask != 0U) { + uint l = __ockl_ctz_u32(mask); + ret = __builtin_amdgcn_ds_bpermute(l << 2, val); + } + + return ret; +} + // Read val from one active lane whose predicate is one. // If no lanes have the predicate set, return none // This is like first, except that first may not have its predicate set static uint elect_uint(int pred, uint val, uint none) { - uint ret = none; - if (__oclc_wavefrontsize64) { - ulong mask = __llvm_amdgcn_icmp_i64_i32(pred, 0, ICMP_NE); - if (mask != 0UL) { - uint l = __ockl_ctz_u64(mask); - ret = __builtin_amdgcn_ds_bpermute(l << 2, val); - } - } else { - uint mask = __llvm_amdgcn_icmp_i32_i32(pred, 0, ICMP_NE); - if (mask != 0U) { - uint l = __ockl_ctz_u32(mask); - ret = __builtin_amdgcn_ds_bpermute(l << 2, val); - } - } - return ret; + return __oclc_wavefrontsize64 ? + elect_uint_wave64(pred, val, none) : + elect_uint_wave32(pred, val, none); +} + +REQUIRES_WAVE64 +static uint +countnz_wave64(ulong a) +{ + ulong mask = __builtin_amdgcn_ballot_w64(a != 0UL); + return __builtin_popcountl(mask); +} + +REQUIRES_WAVE32 +static uint +countnz_wave32(ulong a) +{ + uint mask = __builtin_amdgcn_ballot_w32(a != 0UL); + return __builtin_popcount(mask); } // Count the number of nonzero arguments across the wave static uint countnz(ulong a) { - if (__oclc_wavefrontsize64) { - ulong mask = __llvm_amdgcn_icmp_i64_i64(a, 0UL, ICMP_NE); - return __builtin_popcountl(mask); - } else { - uint mask = __llvm_amdgcn_icmp_i32_i64(a, 0UL, ICMP_NE); - return __builtin_popcount(mask); - } + return __oclc_wavefrontsize64 ? + countnz_wave64(a) : countnz_wave32(a); } // The kind of the smallest block that can hold sz bytes diff --git a/ockl/src/wfaas.cl b/ockl/src/wfaas.cl index 9bac8ddb..d657d508 100644 --- a/ockl/src/wfaas.cl +++ b/ockl/src/wfaas.cl @@ -11,9 +11,6 @@ #define ATTR __attribute__((always_inline)) -// XXX from llvm/include/llvm/IR/InstrTypes.h -#define ICMP_NE 33 - // Hack to prevent incorrect hoisting of the operation. There // currently is no proper way in llvm to prevent hoisting of // operations control flow dependent results. @@ -25,39 +22,60 @@ static int optimizationBarrierHack(int in_val) return out_val; } +REQUIRES_WAVE32 +static bool wfany_impl_w32(int e) { + return __builtin_amdgcn_ballot_w32(e) != 0; +} + +REQUIRES_WAVE64 +static bool wfany_impl_w64(int e) { + return __builtin_amdgcn_ballot_w64(e) != 0; +} + ATTR bool OCKL_MANGLE_I32(wfany)(int e) { e = optimizationBarrierHack(e); - if (__oclc_wavefrontsize64) { - return __llvm_amdgcn_icmp_i64_i32(e, 0, ICMP_NE) != 0UL; - } else { - return __llvm_amdgcn_icmp_i32_i32(e, 0, ICMP_NE) != 0U; - } + return __oclc_wavefrontsize64 ? + wfany_impl_w64(e) : wfany_impl_w32(e); +} + +REQUIRES_WAVE32 +static bool wfall_impl_w32(int e) { + return __builtin_amdgcn_ballot_w32(e) == __builtin_amdgcn_read_exec_lo(); +} + +REQUIRES_WAVE64 +static bool wfall_impl_w64(int e) { + return __builtin_amdgcn_ballot_w64(e) == __builtin_amdgcn_read_exec(); } ATTR bool OCKL_MANGLE_I32(wfall)(int e) { e = optimizationBarrierHack(e); - if (__oclc_wavefrontsize64) { - return __llvm_amdgcn_icmp_i64_i32(e, 0, ICMP_NE) == __builtin_amdgcn_read_exec(); - } else { - return __llvm_amdgcn_icmp_i32_i32(e, 0, ICMP_NE) == __builtin_amdgcn_read_exec_lo(); - } + return __oclc_wavefrontsize64 ? + wfall_impl_w64(e) : wfall_impl_w32(e); +} + + +REQUIRES_WAVE32 +static bool wfsame_impl_w32(int e) { + uint u = __builtin_amdgcn_ballot_w32(e); + return (u == 0) | (u == __builtin_amdgcn_read_exec_lo()); } +REQUIRES_WAVE64 +static bool wfsame_impl_w64(int e) { + ulong u = __builtin_amdgcn_ballot_w64(e); + return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); +} ATTR bool OCKL_MANGLE_I32(wfsame)(int e) { e = optimizationBarrierHack(e); - if (__oclc_wavefrontsize64) { - ulong u = __llvm_amdgcn_icmp_i64_i32(e, 0, ICMP_NE) != 0UL; - return (u == 0UL) | (u == __builtin_amdgcn_read_exec()); - } else { - uint u = __llvm_amdgcn_icmp_i32_i32(e, 0, ICMP_NE) != 0U; - return (u == 0UL) | (u == __builtin_amdgcn_read_exec_lo()); - } + return __oclc_wavefrontsize64 ? + wfsame_impl_w64(e) : wfsame_impl_w32(e); } From 39114521beece1e647d274084ef8958913cb91f3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:36:20 -0500 Subject: [PATCH 12/22] Remove alwaysinline from image intrinsic wrappers I'd hope attribute/feature incompatibility would win over alwaysinline, but might as well remove this just to be sure. Also remove call site attributes, and run attributor to consolidate the attribute groups. Change-Id: I60ac797daf4760ea5a17beea2f7ace9c1ac81283 --- irif/src/imintrin.ll | 1104 +++++++++++++++++++++--------------------- 1 file changed, 553 insertions(+), 551 deletions(-) diff --git a/irif/src/imintrin.ll b/irif/src/imintrin.ll index 24c9c917..b57ff17e 100644 --- a/irif/src/imintrin.ll +++ b/irif/src/imintrin.ll @@ -1,1288 +1,1290 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn-amd-amdhsa" -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_1d_v4f32_i32(i32 %arg1, <8 x i32> %arg2) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg1, <8 x i32> %arg2, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 noundef 15, i32 %arg1, <8 x i32> %arg2, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_2d_v4f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_3d_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_cube_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_1darray_v4f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_2darray_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_1d_v4f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_2d_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_3d_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_cube_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_1darray_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_2darray_v4f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_1d_v4f16_i32(i32 %arg1, <8 x i32> %arg2) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 15, i32 %arg1, <8 x i32> %arg2, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 noundef 15, i32 %arg1, <8 x i32> %arg2, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_2d_v4f16_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_3d_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_cube_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.cube.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_1darray_v4f16_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.1darray.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_2darray_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.2dmsaa.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.2darraymsaa.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_1d_v4f16_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.1d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_2d_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_3d_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.3d.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_cube_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.cube.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_1darray_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.1darray.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_2darray_v4f16_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32(i32 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32(i32 noundef 15, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_2d_f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.load.2d.f32.i32(i32 noundef 1, i32 %arg1, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.3d.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.3d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.cube.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.cube.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.1darray.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.1darray.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_2darray_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.load.2darray.f32.i32(i32 1, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.load.2darray.f32.i32(i32 noundef 1, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.2darray.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.2darray.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_mip_2d_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.load.mip.2d.f32.i32(i32 1, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.load.mip.2d.f32.i32(i32 noundef 1, i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.2d.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.2d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.3d.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.3d.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.cube.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.cube.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.1darray.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.1darray.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_mip_2darray_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.load.mip.2darray.f32.i32(i32 1, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.load.mip.2darray.f32.i32(i32 noundef 1, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.load.mip.2darray.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.load.mip.2darray.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_1d_v4f32_i32(<4 x float> %arg, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_3d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_cube_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_1darray_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2darray_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_3d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_cube_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1darray_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2darray_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_1d_v4f16_i32(<4 x half> %arg, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.1d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, <8 x i32> %arg3, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.1d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, <8 x i32> %arg3, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1d.v4f16.i32(<4 x half>, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1d.v4f16.i32(<4 x half>, i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_3d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.3d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.3d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.3d.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.3d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_cube_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.cube.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.cube.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.cube.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.cube.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_1darray_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.1darray.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.1darray.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1darray.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2darray_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2darray.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2darray.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darray.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2dmsaa.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2dmsaa.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darraymsaa.v4f16.i32(<4 x half>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darraymsaa.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2d.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_3d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.3d.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.3d.v4f16.i32(<4 x half>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.3d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_cube_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.cube.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.cube.v4f16.i32(<4 x half>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.cube.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1darray_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32(<4 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2darray_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32(<4 x half> %arg, i32 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32(<4 x half> %arg, i32 noundef 15, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32(<4 x half>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2d_f32_i32(float %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2d.f32.i32(float %arg, i32 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2d.f32.i32(float %arg, i32 noundef 15, i32 %arg2, i32 %arg3, <8 x i32> %arg4, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.3d.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.3d.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.cube.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.cube.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.1darray.f32.i32(float, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.1darray.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2darray_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.2darray.f32.i32(float %arg, i32 1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.2darray.f32.i32(float %arg, i32 noundef 1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darray.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darray.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.2darraymsaa.f32.i32(float, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #7 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.2darraymsaa.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -declare void @llvm.amdgcn.image.store.mip.1d.f32.i32(float, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2d_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2d.f32.i32(float %arg, i32 1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2d.f32.i32(float %arg, i32 noundef 1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2d.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2d.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.3d.f32.i32(float, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.3d.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.cube.f32.i32(float, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.cube.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.1darray.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.1darray.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind writeonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2darray_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: - tail call void @llvm.amdgcn.image.store.mip.2darray.f32.i32(float %arg, i32 1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 0, i32 0) + tail call void @llvm.amdgcn.image.store.mip.2darray.f32.i32(float %arg, i32 noundef 1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6, i32 noundef 0, i32 noundef 0) ret void } -; Function Attrs: nounwind writeonly -declare void @llvm.amdgcn.image.store.mip.2darray.f32.i32(float, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #3 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) +declare void @llvm.amdgcn.image.store.mip.2darray.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_1d_v4f32_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1d_v4f32_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_1d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_d_1d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_2d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_2d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_d_2d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_d_3d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_1darray_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1darray_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_1darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_d_1darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_l_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_d_2darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_1d_v4f16_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32(i32 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1d_v4f16_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_1d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_d_1d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_2d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_2d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_d_2d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_d_3d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i32 %arg13, i32 %arg14) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.cube.v4f16.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.cube.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_1darray_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1darray_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_1darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_l_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_d_2darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 false, i32 0, i32 0) + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x half> %tmp } -; Function Attrs: nounwind readonly -declare <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.1d.f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.1d.f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.1d.f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.1d.f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_2d_f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_lz_2d_f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_l_2d_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_d_2d_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.3d.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.3d.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.3d.f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.3d.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.cube.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.cube.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.cube.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.cube.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.1darray.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.1darray.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.1darray.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.1darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.1darray.f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_2darray_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.2darray.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.2darray.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.2darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_lz_2darray_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_l_2darray_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_d_2darray_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i32 %arg11, i32 %arg12) local_unnamed_addr #4 { bb: - %tmp = tail call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 false, i32 0, i32 0) + %tmp = tail call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) ret float %tmp } -; Function Attrs: nounwind readonly -declare float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: alwaysinline nounwind readonly +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 2, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 2, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 4, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 4, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 8, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 false, i32 0, i32 0) + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 8, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) ret <4 x float> %tmp } -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.lz.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.lz.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.l.cube.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.l.cube.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly -declare <4 x float> @llvm.amdgcn.image.gather4.l.2darray.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind readonly +; Function Attrs: nounwind memory(read) declare <4 x float> @llvm.amdgcn.image.gather.4h.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 -; Function Attrs: nounwind readonly +; Function Attrs: nounwind memory(read) declare <4 x float> @llvm.amdgcn.image.gather.4h.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 -; Function Attrs: nounwind readonly +; Function Attrs: nounwind memory(read) declare <4 x float> @llvm.amdgcn.image.gather.4h.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 -attributes #0 = { alwaysinline nounwind readonly } -attributes #1 = { nounwind readonly } -attributes #2 = { alwaysinline nounwind writeonly } -attributes #3 = { nounwind writeonly } -attributes #4 = { alwaysinline nounwind readonly "target-features"="+extended-image-insts" } -attributes #5 = { nounwind readonly "target-features"="+extended-image-insts" } -attributes #6 = { alwaysinline nounwind writeonly "target-features"="+extended-image-insts" } -attributes #7 = { nounwind writeonly "target-features"="+extended-image-insts" } +attributes #0 = { nofree norecurse nosync nounwind willreturn memory(read) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) } +attributes #2 = { nofree norecurse nosync nounwind willreturn memory(write) } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } +attributes #4 = { nofree norecurse nosync nounwind willreturn memory(read) "target-features"="+extended-image-insts" } +attributes #5 = { nounwind memory(read) "target-features"="+extended-image-insts" } From e62f831b1745d096dfc8b5a40cdefa33c753428c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 12 Jan 2023 12:18:21 -0500 Subject: [PATCH 13/22] Use __builtin_elementwise_copysign Change-Id: I2d9247925c73cf14dc0ff98ce7618e1a84a38fd4 --- irif/inc/irif.h | 1 - ocml/src/builtins.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/irif/inc/irif.h b/irif/inc/irif.h index a093c290..dbd45466 100644 --- a/irif/inc/irif.h +++ b/irif/inc/irif.h @@ -19,7 +19,6 @@ extern __attribute__((const)) float2 __llvm_fma_2f32(float2, float2, float2) __asm("llvm.fma.v2f32"); extern __attribute__((const)) half2 __llvm_fma_2f16(half2, half2, half2) __asm("llvm.fma.v2f16"); -extern __attribute__((const)) half2 __llvm_copysign_2f16(half2, half2) __asm("llvm.copysign.v2f16"); extern __attribute__((const)) half2 __llvm_round_2f16(half2) __asm("llvm.round.v2f16"); extern __attribute__((const)) half2 __llvm_rint_2f16(half2) __asm("llvm.rint.v2f16"); extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm.canonicalize.v2f16"); diff --git a/ocml/src/builtins.h b/ocml/src/builtins.h index 085b8365..b3b83f12 100644 --- a/ocml/src/builtins.h +++ b/ocml/src/builtins.h @@ -83,7 +83,7 @@ #define BUILTIN_COPYSIGN_F32 __builtin_copysignf #define BUILTIN_COPYSIGN_F64 __builtin_copysign #define BUILTIN_COPYSIGN_F16 __builtin_copysignf16 -#define BUILTIN_COPYSIGN_2F16 __llvm_copysign_2f16 +#define BUILTIN_COPYSIGN_2F16 __builtin_elementwise_copysign #define BUILTIN_FLOOR_F32 __builtin_floorf #define BUILTIN_FLOOR_F64 __builtin_floor From 9d46b967ba62faf8125fa5a4185b1effa1b0c1bb Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:34:18 -0500 Subject: [PATCH 14/22] Use pointer type for enqueued block address Change-Id: I420402c90afe10d757f593031ddd5bfbd194a3ec --- ockl/inc/hsa.h | 8 +++++++- opencl/src/devenq/enqueue.cl | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/ockl/inc/hsa.h b/ockl/inc/hsa.h index 85365882..07adc7be 100644 --- a/ockl/inc/hsa.h +++ b/ockl/inc/hsa.h @@ -2125,7 +2125,13 @@ typedef struct hsa_kernel_dispatch_packet_s { * Opaque handle to a code object that includes an implementation-defined * executable code for the kernel. */ - uint64_t kernel_object; + union { +#ifdef DEVICE_COMPILER + __global +#endif + void *kernel_object; + uint64_t kernel_object_padding; + }; #ifdef HSA_LARGE_MODEL #ifdef DEVICE_COMPILER diff --git a/opencl/src/devenq/enqueue.cl b/opencl/src/devenq/enqueue.cl index a5098fb7..756f98d2 100644 --- a/opencl/src/devenq/enqueue.cl +++ b/opencl/src/devenq/enqueue.cl @@ -5,7 +5,7 @@ #define LOCAL_ALIGN 16 struct rtinfo { - ulong kernel_object; + __global char* kernel_object; uint private_segment_size; uint group_segment_size; }; From 9efba75f3edeca201c77bf4cdef95d810418e9b2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:38:23 -0500 Subject: [PATCH 15/22] Remove invalid image intrinsic declarations These weren't recognized as intrinsics and thus didn't get the correct attributes applied. Change-Id: I87c856cd99f63086e27ca2289d89ecce5f12bbb6 --- irif/src/imintrin.ll | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/irif/src/imintrin.ll b/irif/src/imintrin.ll index b57ff17e..fe4d78f6 100644 --- a/irif/src/imintrin.ll +++ b/irif/src/imintrin.ll @@ -1273,18 +1273,8 @@ declare <4 x float> @llvm.amdgcn.image.gather4.lz.2darray.v4f32.f32(i32 immarg, ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.gather4.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nounwind memory(read) -declare <4 x float> @llvm.amdgcn.image.gather.4h.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 - -; Function Attrs: nounwind memory(read) -declare <4 x float> @llvm.amdgcn.image.gather.4h.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 - -; Function Attrs: nounwind memory(read) -declare <4 x float> @llvm.amdgcn.image.gather.4h.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #5 - attributes #0 = { nofree norecurse nosync nounwind willreturn memory(read) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) } attributes #2 = { nofree norecurse nosync nounwind willreturn memory(write) } attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } attributes #4 = { nofree norecurse nosync nounwind willreturn memory(read) "target-features"="+extended-image-insts" } -attributes #5 = { nounwind memory(read) "target-features"="+extended-image-insts" } From 5b2a9c91efe66415fa845adb83656d7f78211779 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 4 Jan 2023 08:51:32 -0500 Subject: [PATCH 16/22] Split image intrinsics IR file Keep the extended image intrinsics separate from the others. Change-Id: Ib1390bbe7f7127ff794ec7f98ea31d4ab8808938 --- .../{imintrin.ll => base-image-intrinsics.ll} | 573 ------------------ irif/src/extended-image-intrinsics.ll | 436 +++++++++++++ 2 files changed, 436 insertions(+), 573 deletions(-) rename irif/src/{imintrin.ll => base-image-intrinsics.ll} (51%) create mode 100644 irif/src/extended-image-intrinsics.ll diff --git a/irif/src/imintrin.ll b/irif/src/base-image-intrinsics.ll similarity index 51% rename from irif/src/imintrin.ll rename to irif/src/base-image-intrinsics.ll index fe4d78f6..353f2432 100644 --- a/irif/src/imintrin.ll +++ b/irif/src/base-image-intrinsics.ll @@ -61,12 +61,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_load_mip_1d_v4f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: @@ -187,12 +181,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.load.2darray.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.load.2dmsaa.v4f16.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.load.2darraymsaa.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_load_mip_1d_v4f16_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: @@ -253,9 +241,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.load.mip.2darray.v4f16.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_2d_f32_i32(i32 %arg1, i32 %arg2, <8 x i32> %arg3) local_unnamed_addr #0 { bb: @@ -266,15 +251,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.3d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.cube.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.1darray.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_2darray_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -285,15 +261,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.load.2darray.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.mip.1d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_mip_2d_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -304,15 +271,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.load.mip.2d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.mip.3d.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.mip.cube.f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.load.mip.1darray.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_load_mip_2darray_f32_i32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -383,12 +341,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1d_v4f32_i32(<4 x float> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: @@ -509,12 +461,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.2darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2dmsaa.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2darraymsaa.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_1d_v4f16_i32(<4 x half> %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: @@ -575,9 +521,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.mip.2darray.v4f16.i32(<4 x half>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2d_f32_i32(float %arg, i32 %arg2, i32 %arg3, <8 x i32> %arg4) local_unnamed_addr #2 { bb: @@ -588,15 +531,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.3d.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.cube.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.1darray.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_2darray_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: @@ -607,15 +541,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.2darray.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.2darraymsaa.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.mip.1d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2d_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, <8 x i32> %arg5) local_unnamed_addr #2 { bb: @@ -626,15 +551,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) declare void @llvm.amdgcn.image.store.mip.2d.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.mip.3d.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.mip.cube.f32.i32(float, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) -declare void @llvm.amdgcn.image.store.mip.1darray.f32.i32(float, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) define protected void @__llvm_amdgcn_image_store_mip_2darray_f32_i32(float %arg, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, <8 x i32> %arg6) local_unnamed_addr #2 { bb: @@ -655,36 +571,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1d_v4f32_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_1d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_d_1d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_2d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -695,36 +581,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_2d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_d_2d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -735,36 +591,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_d_3d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -775,29 +601,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_1darray_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -808,36 +611,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1darray_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_1darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_d_1darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x float> @__llvm_amdgcn_image_sample_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -848,36 +621,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_l_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_sample_d_2darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_1d_v4f16_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #0 { bb: @@ -888,36 +631,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.1d.v4f16.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1d_v4f16_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_1d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_d_1d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_2d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -928,36 +641,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_2d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_d_2d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -968,36 +651,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.3d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_d_3d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i32 %arg13, i32 %arg14) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -1008,29 +661,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.cube.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.cube.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_1darray_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -1041,36 +671,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.1darray.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1darray_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_1darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected <4 x half> @__llvm_amdgcn_image_sample_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -1081,45 +681,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare <4 x half> @llvm.amdgcn.image.sample.2darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_l_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x half> @__llvm_amdgcn_image_sample_d_2darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x half> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.1d.f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.1d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.1d.f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_2d_f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { bb: @@ -1130,63 +691,6 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_lz_2d_f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_l_2d_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_d_2d_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.3d.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.3d.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.cube.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.cube.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.1darray.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.1darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - ; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) define protected float @__llvm_amdgcn_image_sample_2darray_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { bb: @@ -1197,84 +701,7 @@ bb: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare float @llvm.amdgcn.image.sample.2darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_lz_2darray_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_l_2darray_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected float @__llvm_amdgcn_image_sample_d_2darray_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i32 %arg11, i32 %arg12) local_unnamed_addr #4 { -bb: - %tmp = tail call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret float %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 2, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 4, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) -define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #4 { -bb: - %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 8, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) - ret <4 x float> %tmp -} - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.lz.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.l.cube.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.lz.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - -; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) -declare <4 x float> @llvm.amdgcn.image.gather4.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 - attributes #0 = { nofree norecurse nosync nounwind willreturn memory(read) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) } attributes #2 = { nofree norecurse nosync nounwind willreturn memory(write) } attributes #3 = { nocallback nofree nosync nounwind willreturn memory(write) } -attributes #4 = { nofree norecurse nosync nounwind willreturn memory(read) "target-features"="+extended-image-insts" } diff --git a/irif/src/extended-image-intrinsics.ll b/irif/src/extended-image-intrinsics.ll new file mode 100644 index 00000000..9eb2bbcc --- /dev/null +++ b/irif/src/extended-image-intrinsics.ll @@ -0,0 +1,436 @@ +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" +target triple = "amdgcn-amd-amdhsa" + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1d_v4f32_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_1d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_d_1d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2d_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_2d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_d_2d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_3d_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_d_3d_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_cube_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_1darray_v4f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_1darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_d_1darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_lz_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_l_2darray_v4f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_sample_d_2darray_v4f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1d_v4f16_f32(float %arg1, <8 x i32> %arg2, <4 x i32> %arg3) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 noundef 15, float %arg1, <8 x i32> %arg2, <4 x i32> %arg3, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_1d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_d_1d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2d_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_2d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_d_2d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_3d_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_d_3d_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i32 %arg13, i32 %arg14) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, float %arg8, float %arg9, <8 x i32> %arg10, <4 x i32> %arg11, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_cube_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_1darray_v4f16_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_1darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_d_1darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_lz_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_l_2darray_v4f16_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x half> @__llvm_amdgcn_image_sample_d_2darray_v4f16_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 noundef 15, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x half> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_lz_2d_f32_f32(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_l_2d_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.2d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_d_2d_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, <8 x i32> %arg7, <4 x i32> %arg8, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_lz_2darray_f32_f32(float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, <8 x i32> %arg4, <4 x i32> %arg5, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.lz.2darray.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_l_2darray_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, <8 x i32> %arg5, <4 x i32> %arg6, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected float @__llvm_amdgcn_image_sample_d_2darray_f32_f32_f32(float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i32 %arg11, i32 %arg12) local_unnamed_addr #0 { +bb: + %tmp = tail call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 noundef 1, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <8 x i32> %arg8, <4 x i32> %arg9, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret float %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 1, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 2, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 4, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) +define protected <4 x float> @__llvm_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) local_unnamed_addr #0 { +bb: + %tmp = tail call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 noundef 8, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4, i1 noundef false, i32 noundef 0, i32 noundef 0) + ret <4 x float> %tmp +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) +declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 + +attributes #0 = { nofree norecurse nosync nounwind willreturn memory(read) "target-features"="+extended-image-insts" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) } From 35844944509f29a9db3a070d95c67617dd38bfc1 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 12 Jan 2023 12:19:17 -0500 Subject: [PATCH 17/22] Delete unused BUILTIN_CANONICALIZE_2F16 I had even added __builtin_elementwise_copysign for this but then it turns out this is unused. Change-Id: I165e22ab45dccc1c6480c7c183db0743a16fb0e4 --- irif/inc/irif.h | 1 - ocml/src/builtins.h | 1 - 2 files changed, 2 deletions(-) diff --git a/irif/inc/irif.h b/irif/inc/irif.h index dbd45466..ab7b0fb5 100644 --- a/irif/inc/irif.h +++ b/irif/inc/irif.h @@ -21,7 +21,6 @@ extern __attribute__((const)) float2 __llvm_fma_2f32(float2, float2, float2) __a extern __attribute__((const)) half2 __llvm_fma_2f16(half2, half2, half2) __asm("llvm.fma.v2f16"); extern __attribute__((const)) half2 __llvm_round_2f16(half2) __asm("llvm.round.v2f16"); extern __attribute__((const)) half2 __llvm_rint_2f16(half2) __asm("llvm.rint.v2f16"); -extern __attribute__((const)) half2 __llvm_canonicalize_2f16(half2) __asm("llvm.canonicalize.v2f16"); #define BUILTIN_CLZ_U8(x) (uchar)(x == 0u ? 8 : __builtin_clz(x) - 24) #define BUILTIN_CLZ_U16(x) (ushort)(x == 0u ? 16 : __builtin_clzs(x)) diff --git a/ocml/src/builtins.h b/ocml/src/builtins.h index b3b83f12..26f0299c 100644 --- a/ocml/src/builtins.h +++ b/ocml/src/builtins.h @@ -130,7 +130,6 @@ #define BUILTIN_CANONICALIZE_F32(X) __builtin_canonicalizef(X) #define BUILTIN_CANONICALIZE_F64(X) __builtin_canonicalize(X) #define BUILTIN_CANONICALIZE_F16(X) __builtin_canonicalizef16(X) -#define BUILTIN_CANONICALIZE_2F16(X) __llvm_canonicalize_2f16(X) #define BUILTIN_MULHI_U32(A,B) (((ulong)(A) * (ulong)(B)) >> 32) From 227f14c483e5c542193e8dc4456e4dd048917533 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Fri, 13 Jan 2023 14:49:26 -0800 Subject: [PATCH 18/22] Add ASAN version of device malloc Change-Id: Ie06e7efd5a129fd5030f3f109b0f0050c653c153 --- asanrtl/src/dm.cl | 659 ++++++++++++++++++++++++++++++++++++++++++++++ ockl/src/dm.cl | 2 +- 2 files changed, 660 insertions(+), 1 deletion(-) create mode 100644 asanrtl/src/dm.cl diff --git a/asanrtl/src/dm.cl b/asanrtl/src/dm.cl new file mode 100644 index 00000000..7b5d910c --- /dev/null +++ b/asanrtl/src/dm.cl @@ -0,0 +1,659 @@ +/*===-------------------------------------------------------------------------- + * ROCm Device Libraries + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + *===------------------------------------------------------------------------*/ + +#include "oclc.h" +#include "irif.h" +#include "asan_util.h" +#include "shadow_mapping.h" + +static const __constant uint kAsanHeapLeftRedzoneMagicx4 = 0xfafafafaU; +static const __constant ulong kAsanHeapLeftRedzoneMagicx8 = 0xfafafafafafafafaUL; +static const __constant uchar kAsanHeapFreeMagic = (uchar)0xfd; + +extern ulong __ockl_devmem_request(ulong addr, ulong size); + +// Minimum Number of bytes we want to quarantine +#define QUARANTINE_BYTES (SLAB_BYTES * 16) + +// Whether we track non-slab allocations +#define NON_SLAB_TRACKING 1 + +// Magic at beginning of allocation +#define ALLOC_MAGIC 0xfedcba1ee1abcdefUL + +#define AS(P,V) __opencl_atomic_store(P, V, memory_order_relaxed, memory_scope_device) +#define AL(P) __opencl_atomic_load(P, memory_order_relaxed, memory_scope_device) +#define AA(P,V) __opencl_atomic_fetch_add(P, V, memory_order_relaxed, memory_scope_device) +#define AO(P,V) __opencl_atomic_fetch_or(P, V, memory_order_relaxed, memory_scope_device) +#define ACE(P,E,V) __opencl_atomic_compare_exchange_strong(P, E, V, memory_order_relaxed, memory_order_relaxed, memory_scope_device) + +// An allocation +#define ALLOC_HEADER_BYTES 32 +typedef struct alloc_struct { + ulong magic; // Assist with memory scan for header + ulong sp; // slab pointer, 0 if non-slab allocation + ulong pc; // We can only collect PC currently, callstack ID later + uint asz; // Total number of bytes including header, redzone, and round, multiple of 16 + uint usz; // user specificed size + ulong ret[]; // Address returned by malloc, always 16-byte aligned +} alloc_t; + +// Assumes 4096 byte minimum alignment of slab +#define SLAB_ALIGN 4096 +#define SLAB_CTR_MASK (ulong)(SLAB_ALIGN - 1) +#define SLAB_BUSY ((__global slab_t *)1UL) +#define SLAB_TICKS 20000 +#define SLAB_BYTES (1UL << 21) +#define SLAB_THRESHOLD (SLAB_BYTES / 64) +#define SLAB_HEADER_BYTES 32 +#define SLAB_RECYCLE_THRESHOLD ((QUARANTINE_BYTES+SLAB_BYTES-1) / SLAB_BYTES) + +// A slab of memory used to provide malloc returned blocks +typedef struct slab_s { + atomic_ulong next; // link to next slab on queue chain, must be first + atomic_ulong next2; // link to next slab on stack chain, must be second + atomic_ulong ap; // Pointer to next allocation (>= &space[0] ) + atomic_uint rb; // returned bytes + atomic_uint flags; // flags + ulong space[(SLAB_BYTES-SLAB_HEADER_BYTES)/8]; // Space for allocations. Must be aligned 16 +} slab_t; + +// The heap +typedef struct heap_s { + atomic_ulong fake_next; // Heap is a fake slab, must be first + atomic_ulong fake_next2; // Heap is a fake slab, must be second + atomic_ulong head; // points to dummy or most recently dequeued slab + atomic_ulong tail; // usually points to most recently enqueued slab + atomic_ulong top; // Top of slab stack + atomic_ulong cs; // current slab pointer + atomic_ulong atime; // Time most recent allocation started + atomic_ulong initial_slabs; // pointer to next preallocated slab + ulong initial_slabs_end; // pointer to end of preallocated slabs + atomic_uint nas; // Number of allocated slabs +#if defined NON_SLAB_TRACKING + atomic_ulong num_nonslab_allocations; // Count of number of non-slab allocations that have not been freed +#endif +} heap_t; + +// Inhibit control flow optimizations +#define O0(X) X = o0(X) +__attribute__((overloadable)) static int o0(int x) { int y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; } +__attribute__((overloadable)) static uint o0(uint x) { uint y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; } +__attribute__((overloadable)) static ulong o0(ulong x) { ulong y; __asm__ volatile("" : "=v"(y) : "0"(x)); return y; } + +// Overloads to broadcast the value held by the first active lane +// The result is known to be wave-uniform +static __attribute__((overloadable)) uint +first(uint v) +{ + return __builtin_amdgcn_readfirstlane(v); +} + +static __attribute__((overloadable)) ulong +first(ulong v) +{ + uint2 v2 = __builtin_astype(v, uint2); + uint2 w2; + w2.x = __builtin_amdgcn_readfirstlane(v2.x); + w2.y = __builtin_amdgcn_readfirstlane(v2.y); + return __builtin_astype(w2, ulong); +} + +static __attribute__((overloadable)) __global void * +first(__global void * v) +{ + uint2 v2 = __builtin_astype(v, uint2); + uint2 w2; + w2.x = __builtin_amdgcn_readfirstlane(v2.x); + w2.y = __builtin_amdgcn_readfirstlane(v2.y); + return __builtin_astype(w2, __global void *); +} + +// The number of active lanes at this point +REQUIRES_WAVE64 +static uint +active_lane_count_w64(void) +{ + return __builtin_popcountl(__builtin_amdgcn_read_exec()); +} + +REQUIRES_WAVE32 +static uint +active_lane_count_w32(void) +{ + return __builtin_popcount(__builtin_amdgcn_read_exec_lo()); +} + +static uint +active_lane_count(void) +{ + return __oclc_wavefrontsize64 ? active_lane_count_w64() : active_lane_count_w32(); +} + +static ulong +round_16(ulong n) +{ + return ((n + 15) >> 4) << 4; +} + +static __global slab_t * +slabptr(ulong p) +{ + return (__global slab_t *)(p & ~SLAB_CTR_MASK); +} + +static ulong +addcnt(ulong p, ulong c) +{ + return p | (((c & SLAB_CTR_MASK) + 1UL) & SLAB_CTR_MASK); +} + +NO_SANITIZE_ADDR +static __global heap_t * +get_heap_ptr(void) { + if (__oclc_ABI_version < 500) { + static __attribute__((aligned(4096))) __global heap_t heap; + return &heap; + } else { + return (__global heap_t *)((__constant size_t *)__builtin_amdgcn_implicitarg_ptr())[12]; + } +} + +// Size of additional left redzone, roughly assumes 32 byte header, multiple of 16 +static uint +added_redzone(uint sz) +{ + return sz < 128 ? 0 : + sz < 512 ? 96 : + sz < 2048 ? 224 : + sz < 8192 ? 992 : 2016; +} + +// Called by a single workitem +static void +slab_pause(void) +{ + __builtin_amdgcn_s_sleep(2); +} + +// Intended to be called from only one lane of a wave +NO_SANITIZE_ADDR +static void +put_free_slab(__global heap_t *hp, __global slab_t *sp) +{ + ulong head = AL(&hp->head); + if (slabptr(head) == sp) { + ulong top = AL(&hp->top); + for (;;) { + AS(&sp->next2, (ulong)slabptr(top)); + if (ACE(&hp->top, &top, addcnt((ulong)sp, top))) + return; + slab_pause(); + } + } + AS(&sp->next, 0UL); + + ulong tail = AL(&hp->tail); + for (;;) { + __global slab_t *last = slabptr(tail); + ulong next = 0; + if (ACE(&last->next, &next, (ulong)sp)) + break; + + ACE(&hp->tail, &tail, addcnt(next, tail)); + slab_pause(); + } + + ACE(&hp->tail, &tail, addcnt((ulong)sp, tail)); + return; +} + +// Intended to be called from only one lane of a wave +NO_SANITIZE_ADDR +static __global slab_t * +get_free_slab(__global heap_t *hp) +{ + for (;;) { + ulong head = AL(&hp->head); + __global slab_t *first = slabptr(head); + ulong next = AL(&first->next); + if (head == AL(&hp->head)) { + ulong tail = AL(&hp->tail); + if (first == slabptr(tail)) { + if (!next) + break; + ACE(&hp->tail, &next, addcnt(next, tail)); + } else if (next) { + if (ACE(&hp->head, &head, addcnt(next, head))) + return slabptr(next); + } + } + slab_pause(); + } + + ulong top = AL(&hp->top); + for (;;) { + __global slab_t *sp = slabptr(top); + if (sp) { + ulong next2 = AL(&sp->next2); + if (ACE(&hp->top, &top, addcnt(next2, top))) + return sp; + } else + return 0; + slab_pause(); + } +} + +// reset slab, called by a single workitem +NO_SANITIZE_ADDR +static void +reset_slab(__global slab_t *sp) +{ + AS(&sp->ap, (ulong)sp + SLAB_HEADER_BYTES); + AS(&sp->rb, 0U); +} + +NO_SANITIZE_ADDR +static void +poison_allocation(__global alloc_t *ap, uint sz) +{ + __global uchar *asp = (__global uchar *)MEM_TO_SHADOW((ulong)ap) + ALLOC_HEADER_BYTES / SHADOW_GRANULARITY; + for (uint i = 0; i < (sz + SHADOW_GRANULARITY - 1) / SHADOW_GRANULARITY; ++i) + asp[i] = kAsanHeapFreeMagic; + + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent"); +} + +NO_SANITIZE_ADDR +static void +unpublish_allocation(__global alloc_t *ap, ulong pc) +{ + ap->pc = pc; + poison_allocation(ap, ap->usz); +} + +// Free a slab based allocation +NO_SANITIZE_ADDR +static void +slab_free(__global alloc_t *ap, ulong pc) +{ + unpublish_allocation(ap, pc); + __global heap_t *hp = get_heap_ptr(); + __global slab_t *sp = (__global slab_t *)ap->sp; + int go = 1; + do { + O0(go); + if (go) { + if (sp == first(sp)) { + uint sz = __ockl_alisa_u32(ap->asz); + uint aid = __ockl_activelane_u32(); + if (aid == 0) { + uint rb = AA(&sp->rb, sz) + sz; + if (rb == SLAB_BYTES - SLAB_HEADER_BYTES) { + ulong cs = AL(&hp->cs); + if ((ulong)sp == cs) { + ACE(&hp->cs, &cs, 0UL); + } + put_free_slab(hp, sp); + } + } + go = 0; + } + } + } while (__ockl_wfany_i32(go)); +} + +// Free a non-slab allocation +NO_SANITIZE_ADDR +static void +non_slab_free(__global alloc_t *ap, ulong pc) +{ + ap->pc = pc; + __ockl_devmem_request((ulong)ap, 0); + +#if defined NON_SLAB_TRACKING + uint aid = __ockl_activelane_u32(); + uint nactive = active_lane_count(); + + if (aid == 0) { + __global heap_t *hp = get_heap_ptr(); + AA(&hp->num_nonslab_allocations, -nactive); + } +#endif +} + +// free +NO_SANITIZE_ADDR +void +__asan_free_impl(ulong aa, ulong pc) +{ + __global alloc_t *ap = (__global alloc_t *)(aa - ALLOC_HEADER_BYTES); + if (ap->sp) + slab_free(ap, pc); + else + non_slab_free(ap, pc); +} + +// Non-slab based allocation (when size is above threshold) +NO_SANITIZE_ADDR +static ulong +non_slab_malloc(ulong sz, ulong pc) +{ + ulong ret = __ockl_devmem_request(0UL, sz + ALLOC_HEADER_BYTES); + if (ret) { +#if defined NON_SLAB_TRACKING + uint aid = __ockl_activelane_u32(); + uint nactive = active_lane_count(); + + if (aid == 0) { + __global heap_t *hp = get_heap_ptr(); + AA(&hp->num_nonslab_allocations, nactive); + } +#endif + +#if SLAB_HEADER_BYTES == 32 + __global uint *asp = (__global uint *)MEM_TO_SHADOW(ret); + *asp = kAsanHeapLeftRedzoneMagicx4; +#else +#error unimplemented poisoning +#endif + + __global alloc_t *ap = (__global alloc_t *)ret; + ap->magic = ALLOC_MAGIC; + ap->sp = 0UL; + ap->pc = pc; + ap->asz = (uint)(sz + ALLOC_HEADER_BYTES); + ap->usz = (uint)sz; + ret += ALLOC_HEADER_BYTES; + } + return ret; +} + +// Called by a single workitem +NO_SANITIZE_ADDR +static __global slab_t * +obtain_new_slab(__global heap_t *hp) +{ + ulong ret = 0; + + ulong is = AL(&hp->initial_slabs); + ulong se = hp->initial_slabs_end; + if (is < se) { + is = AA(&hp->initial_slabs, SLAB_BYTES); + if (is < se) + ret = is; + } else { + ret = __ockl_devmem_request(0, SLAB_BYTES); + } + + if (ret) + AA(&hp->nas, 1); + + return (__global slab_t *)ret; +} + +// Called by a single workitem +NO_SANITIZE_ADDR +static __global slab_t * +try_new_slab(__global heap_t *hp) +{ + ulong atime = AL(&hp->atime); + ulong now = __ockl_steadyctr_u64(); + ulong dt = now - atime; + if (dt < SLAB_TICKS || !ACE(&hp->atime, &atime, now)) + return SLAB_BUSY; + + __global slab_t *sp = obtain_new_slab(hp); + if (sp) { + AS(&sp->next, 0UL); + AS(&sp->next2, 0UL); + AS(&sp->ap, (ulong)sp->space); + AS(&sp->rb, 0U); + AS(&sp->flags, 0U); + } + return sp; +} + +// Called by a single workitem +NO_SANITIZE_ADDR +static void +new_slab_wait(__global heap_t *hp) +{ + ulong atime = AL(&hp->atime); + ulong now = __ockl_steadyctr_u64(); + ulong dt = now - atime; + if (dt < SLAB_TICKS) + __ockl_rtcwait_u32(SLAB_TICKS - (uint)dt); +} + +// Called by a single workitem +NO_SANITIZE_ADDR +static __global slab_t * +get_current_slab(__global heap_t *hp) +{ + for (;;) { + ulong cs = AL(&hp->cs); + if (cs) + return (__global slab_t *)cs; + + slab_pause(); + + cs = AL(&hp->cs); + if (cs) + return (__global slab_t *)cs; + + slab_pause(); + + cs = AL(&hp->cs); + if (cs) + return (__global slab_t *)cs; + + if (AL(&hp->nas) >= SLAB_RECYCLE_THRESHOLD) { + __global slab_t *fs = get_free_slab(hp); + if (fs) { + reset_slab(fs); + if (ACE(&hp->cs, &cs, (ulong)fs)) + return fs; + put_free_slab(hp, fs); + return (__global slab_t *)cs; + } + } + + __global slab_t *ns = try_new_slab(hp); + if ((ulong)ns > (ulong)SLAB_BUSY) { + if (ACE(&hp->cs, &cs, (ulong)ns)) + return ns; + put_free_slab(hp, ns); + return (__global slab_t *)cs; + } + + if (!ns) + return 0; + + new_slab_wait(hp); + } +} + +NO_SANITIZE_ADDR +static void +poison_slab(__global slab_t *sp, int aid, int na) +{ + __global ulong *ssp = (__global ulong *)MEM_TO_SHADOW((ulong)sp); + + for (int i=aid; i < SLAB_BYTES / SHADOW_GRANULARITY / sizeof(ulong); i += na) + ssp[i] = kAsanHeapLeftRedzoneMagicx8; + + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent"); + + if (!aid) + AO(&sp->flags, 2); +} + +NO_SANITIZE_ADDR +static void +poison_slab_wait(__global slab_t *sp) +{ + while (AL(&sp->flags) != 3) + slab_pause(); +} + +NO_SANITIZE_ADDR +static void +unpoison_allocation(__global alloc_t *ap, uint sz) +{ + __global uchar *asp = (__global uchar *)MEM_TO_SHADOW((ulong)ap) + ALLOC_HEADER_BYTES / SHADOW_GRANULARITY; + for (uint i = 0; i < sz / SHADOW_GRANULARITY; ++i) + asp[i] = (uchar)0; + + if (sz % SHADOW_GRANULARITY) + asp[sz / SHADOW_GRANULARITY] = (uchar)(sz % SHADOW_GRANULARITY); + + __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent"); +} + +NO_SANITIZE_ADDR +static void +publish_allocation(__global alloc_t *ap, ulong sp, ulong pc, uint asz, uint usz) +{ + ap->magic = ALLOC_MAGIC; + ap->pc = pc; + ap->sp = sp; + ap->asz = asz; + ap->usz = usz; + + unpoison_allocation(ap, usz); +} + +// slab based malloc +NO_SANITIZE_ADDR +static ulong +slab_malloc(ulong lsz, ulong pc) +{ + __global heap_t *hp = get_heap_ptr(); + uint usz = (uint)lsz; + uint arz = added_redzone(usz); + uint asz = arz + ALLOC_HEADER_BYTES + round_16(usz); + ulong ret = 0; + + int go = 1; + do { + if (go) { + O0(go); + uint aid = __ockl_activelane_u32(); + + __global slab_t *cs = (__global slab_t *)0; + if (!aid) + cs = get_current_slab(hp); + cs = first(cs); + + if (!cs) { + go = 0; + continue; + } + + uint f = 0U; + if (!aid) { + f = AO(&cs->flags, 1U); + } + f = first(f); + if (!f) { + poison_slab(cs, aid, active_lane_count()); + } else if (f == 1) { + if (!aid) + poison_slab_wait(cs); + } + + uint o = __ockl_alisa_u32(asz); + + ulong p = 0UL; + if (!aid) + p = AA(&cs->ap, o); + p = first(p); + + if (p + o <= (ulong)cs + SLAB_BYTES) { + __global alloc_t *ap = (__global alloc_t *)(p + o - asz + arz); + publish_allocation(ap, (ulong)cs, pc, asz, usz); + ret = (ulong)ap + ALLOC_HEADER_BYTES; + go = 0; + } else { + if (!__ockl_activelane_u32()) { + ulong e = (ulong)cs; + ACE(&hp->cs, &e, 0UL); + } + if (p + o - asz < (ulong)cs + SLAB_BYTES) { + uint unused = (uint)((ulong)cs + SLAB_BYTES - (p + o - asz)); + uint rb = AA(&cs->rb, unused) + unused; + + if (rb == SLAB_BYTES - SLAB_HEADER_BYTES) + put_free_slab(hp, cs); + } + } + } + } while (__ockl_wfany_i32(go)); + + return ret; +} + +// malloc +OPT_NONE +NO_SANITIZE_ADDR +ulong +__asan_malloc_impl(ulong sz, ulong pc) +{ + if (sz > SLAB_THRESHOLD) + return non_slab_malloc(sz, pc); + else + return slab_malloc(sz, pc); +} + +// This initialization assumes a one-workgroup grid with 256 work items, +// exacty like the non-ASAN version +NO_SANITIZE_ADDR +void +__ockl_dm_init_v1(ulong ha, ulong sa, uint hb, uint nis) +{ + uint lid = __ockl_get_local_id(0); + + __global ulong *hs = (__global ulong *)MEM_TO_SHADOW(ha); + hs[lid+0*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+1*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+2*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+3*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+4*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+5*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+6*256] = kAsanHeapLeftRedzoneMagicx8; + hs[lid+7*256] = kAsanHeapLeftRedzoneMagicx8; + + if (lid == 0) { + __global heap_t *hp = (__global heap_t *)ha; + AS(&hp->fake_next, 0UL); + AS(&hp->fake_next2, 0UL); + AS(&hp->head, (ulong)&hp->fake_next); + AS(&hp->tail, (ulong)&hp->fake_next); + AS(&hp->top, 0UL); + AS(&hp->cs, 0UL); + AS(&hp->initial_slabs, sa); + hp->initial_slabs_end = sa + ((ulong)nis << 21); + AS(&hp->nas, 0U); +#if defined NON_SLAB_TRACKING + AS(&hp->num_nonslab_allocations, 0UL); +#endif + } +} + +#if defined NON_SLAB_TRACKING +// return a snapshot of the current number of nonslab allocations +// which haven't been deallocated +NO_SANITIZE_ADDR +ulong +__ockl_dm_nna(void) +{ + __global heap_t *hp = get_heap_ptr(); + return AL(&hp->num_nonslab_allocations); +} +#endif + diff --git a/ockl/src/dm.cl b/ockl/src/dm.cl index 2dbbdac0..acf42dd1 100644 --- a/ockl/src/dm.cl +++ b/ockl/src/dm.cl @@ -981,7 +981,7 @@ __ockl_dm_alloc(ulong sz) // Initialize the heap // This is intended to be called by a kernel launched by the language runtime // at device initialization time, having one workgroup consisting of 256 workitems. -void +__attribute__((weak)) void __ockl_dm_init_v1(ulong hp, ulong sp, uint hb, uint nis) { uint lid = __ockl_get_local_id(0); From 35fb6984d476142d297b76331a60fc88c2d92f25 Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Thu, 19 Jan 2023 15:03:35 -0800 Subject: [PATCH 19/22] Add missing attribute Change-Id: Iccba56bb9970a4fc31328969b644c2b49bbf8314 --- asanrtl/src/dm.cl | 1 + 1 file changed, 1 insertion(+) diff --git a/asanrtl/src/dm.cl b/asanrtl/src/dm.cl index 7b5d910c..843e5176 100644 --- a/asanrtl/src/dm.cl +++ b/asanrtl/src/dm.cl @@ -327,6 +327,7 @@ non_slab_free(__global alloc_t *ap, ulong pc) } // free +OPT_NONE NO_SANITIZE_ADDR void __asan_free_impl(ulong aa, ulong pc) From 8e595dd520973e03fbbf2bf8da34bc42d642a3bc Mon Sep 17 00:00:00 2001 From: Brian Sumner Date: Sun, 22 Jan 2023 09:10:55 -0800 Subject: [PATCH 20/22] Add missing check for null pointer in free Change-Id: Ia87df1b6c879afb0cf7021925f482577a3f025ff --- asanrtl/src/dm.cl | 3 +++ ockl/src/dm.cl | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/asanrtl/src/dm.cl b/asanrtl/src/dm.cl index 843e5176..c89d5874 100644 --- a/asanrtl/src/dm.cl +++ b/asanrtl/src/dm.cl @@ -332,6 +332,9 @@ NO_SANITIZE_ADDR void __asan_free_impl(ulong aa, ulong pc) { + if (!aa) + return; + __global alloc_t *ap = (__global alloc_t *)(aa - ALLOC_HEADER_BYTES); if (ap->sp) slab_free(ap, pc); diff --git a/ockl/src/dm.cl b/ockl/src/dm.cl index acf42dd1..9b8cf24a 100644 --- a/ockl/src/dm.cl +++ b/ockl/src/dm.cl @@ -421,7 +421,9 @@ __ockl_dm_dealloc(ulong addr) { // Check for non-block and handle elsewhere if ((addr & 0xfffUL) == 0UL) { - non_slab_free(addr); + if (addr) + non_slab_free(addr); + return; } From 3e052b9b767005794c40a490af54f4ee28ead85b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Dec 2022 14:59:20 -0500 Subject: [PATCH 21/22] Replace single class test of infinities with compare equal. Regex and clang-format, plus fix up the few places that needed new parentheses. Class is fancier than necessary and has stricter requirements than a regular compare. The backend already optimizes these down to a regular compare. In the case of f16, it also brings with it a subtarget requirement as long as we're going through __builtin_amdgcn_classh. Change-Id: I3dfa805608d7c987f765b7d464063e3fda63cb63 --- ocml/src/acoshD.cl | 2 +- ocml/src/acoshF.cl | 2 +- ocml/src/asinhD.cl | 2 +- ocml/src/asinhF.cl | 2 +- ocml/src/cexpD.cl | 4 ++-- ocml/src/cexpF.cl | 4 ++-- ocml/src/csqrtD.cl | 4 ++-- ocml/src/csqrtF.cl | 4 ++-- ocml/src/hypotD.cl | 3 +-- ocml/src/hypotF.cl | 3 +-- ocml/src/hypotH.cl | 3 +-- ocml/src/j0D.cl | 2 +- ocml/src/j0F.cl | 2 +- ocml/src/j1D.cl | 2 +- ocml/src/j1F.cl | 2 +- ocml/src/log1pD.cl | 2 +- ocml/src/log1pF.cl | 2 +- ocml/src/rcbrtD.cl | 2 +- ocml/src/scalbD.cl | 4 ++-- ocml/src/scalbF.cl | 4 ++-- ocml/src/scalbH.cl | 4 ++-- ocml/src/y0D.cl | 2 +- ocml/src/y0F.cl | 2 +- ocml/src/y1D.cl | 2 +- ocml/src/y1F.cl | 2 +- 25 files changed, 32 insertions(+), 35 deletions(-) diff --git a/ocml/src/acoshD.cl b/ocml/src/acoshD.cl index a596ecae..54f812ad 100644 --- a/ocml/src/acoshD.cl +++ b/ocml/src/acoshD.cl @@ -22,7 +22,7 @@ MATH_MANGLE(acosh)(double x) double z = MATH_PRIVATE(lnep)(a, b ? 512 : 0); if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z; + z = x == PINF_F64 ? x : z; z = x < 1.0 ? QNAN_F64 : z; } diff --git a/ocml/src/acoshF.cl b/ocml/src/acoshF.cl index 5b60162b..89ecc796 100644 --- a/ocml/src/acoshF.cl +++ b/ocml/src/acoshF.cl @@ -22,7 +22,7 @@ MATH_MANGLE(acosh)(float x) float z = MATH_PRIVATE(lnep)(a, b ? 64 : 0); if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z; + z = x == PINF_F32 ? x : z; z = x < 1.0f ? QNAN_F32 : z; } diff --git a/ocml/src/asinhD.cl b/ocml/src/asinhD.cl index c7b2a623..c9552ae0 100644 --- a/ocml/src/asinhD.cl +++ b/ocml/src/asinhD.cl @@ -25,7 +25,7 @@ MATH_MANGLE(asinh)(double x) z = y < 0x1.0p-27 ? y : z; if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F64(y, CLASS_PINF) ? y : z; + z = y == PINF_F64 ? y : z; } return BUILTIN_COPYSIGN_F64(z, x); diff --git a/ocml/src/asinhF.cl b/ocml/src/asinhF.cl index 11305aad..0f9ae149 100644 --- a/ocml/src/asinhF.cl +++ b/ocml/src/asinhF.cl @@ -25,7 +25,7 @@ MATH_MANGLE(asinh)(float x) z = y < 0x1.0p-12f ? y : z; if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F32(y, CLASS_PINF) ? y : z; + z = y == PINF_F32 ? y : z; } return BUILTIN_COPYSIGN_F32(z, x); diff --git a/ocml/src/cexpD.cl b/ocml/src/cexpD.cl index 0c77a52e..e293f1ad 100644 --- a/ocml/src/cexpD.cl +++ b/ocml/src/cexpD.cl @@ -24,11 +24,11 @@ MATH_MANGLE(cexp)(double2 z) if (!FINITE_ONLY_OPT()) { bool isfinite = BUILTIN_ISFINITE_F64(y); - if (BUILTIN_CLASS_F64(x, CLASS_NINF)) { + if (x == NINF_F64) { rr = 0.0; ri = isfinite ? ri : 0.0; } - if (BUILTIN_CLASS_F64(x, CLASS_PINF)) { + if (x == PINF_F64) { rr = isfinite ? rr : PINF_F64; ri = isfinite ? ri : QNAN_F64; ri = y == 0.0 ? y : ri; diff --git a/ocml/src/cexpF.cl b/ocml/src/cexpF.cl index 85f64154..922061c4 100644 --- a/ocml/src/cexpF.cl +++ b/ocml/src/cexpF.cl @@ -24,11 +24,11 @@ MATH_MANGLE(cexp)(float2 z) if (!FINITE_ONLY_OPT()) { bool finite = BUILTIN_ISFINITE_F32(y); - if (BUILTIN_CLASS_F32(x, CLASS_NINF)) { + if (x == NINF_F32) { rr = 0.0f; ri = finite ? ri : 0.0f; } - if (BUILTIN_CLASS_F32(x, CLASS_PINF)) { + if (x == PINF_F32) { rr = finite ? rr : PINF_F32; ri = finite ? ri : QNAN_F32; ri = y == 0.0f ? y : ri; diff --git a/ocml/src/csqrtD.cl b/ocml/src/csqrtD.cl index fd0f684d..78498374 100644 --- a/ocml/src/csqrtD.cl +++ b/ocml/src/csqrtD.cl @@ -38,8 +38,8 @@ MATH_MANGLE(csqrt)(double2 z) bool i = BUILTIN_ISINF_F64(b); rr = i ? b : rr; ri = i ? b : ri; - ri = BUILTIN_CLASS_F64(z.x, CLASS_NINF) ? a : ri; - rr = BUILTIN_CLASS_F64(z.x, CLASS_PINF) ? a : rr; + ri = z.x == NINF_F64 ? a : ri; + rr = z.x == PINF_F64 ? a : rr; } return (double2)(rr, BUILTIN_COPYSIGN_F64(ri, z.y)); diff --git a/ocml/src/csqrtF.cl b/ocml/src/csqrtF.cl index 4fa4e6bc..a4479a2a 100644 --- a/ocml/src/csqrtF.cl +++ b/ocml/src/csqrtF.cl @@ -29,8 +29,8 @@ MATH_MANGLE(csqrt)(float2 z) bool i = BUILTIN_ISINF_F32(b); rr = i ? b : rr; ri = i ? b : ri; - ri = BUILTIN_CLASS_F32(z.x, CLASS_NINF) ? a : ri; - rr = BUILTIN_CLASS_F32(z.x, CLASS_PINF) ? a : rr; + ri = z.x == NINF_F32 ? a : ri; + rr = z.x == PINF_F32 ? a : rr; } return (float2)(rr, BUILTIN_COPYSIGN_F32(ri, z.y)); diff --git a/ocml/src/hypotD.cl b/ocml/src/hypotD.cl index ef9b5819..a0b5b56a 100644 --- a/ocml/src/hypotD.cl +++ b/ocml/src/hypotD.cl @@ -22,8 +22,7 @@ MATH_MANGLE(hypot)(double x, double y) ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? QNAN_F64 : ret; - ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? - PINF_F64 : ret; + ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? PINF_F64 : ret; } return ret; diff --git a/ocml/src/hypotF.cl b/ocml/src/hypotF.cl index 258e2482..0be18ae9 100644 --- a/ocml/src/hypotF.cl +++ b/ocml/src/hypotF.cl @@ -19,8 +19,7 @@ MATH_MANGLE(hypot)(float x, float y) float ret = BUILTIN_FLDEXP_F32(MATH_FAST_SQRT(MATH_MAD(a, a, b*b)), e); if (!FINITE_ONLY_OPT()) { - ret = BUILTIN_ISINF_F32(t) ? - PINF_F32 : ret; + ret = BUILTIN_ISINF_F32(t) ? PINF_F32 : ret; } return ret; diff --git a/ocml/src/hypotH.cl b/ocml/src/hypotH.cl index 154930b3..8fce0cc0 100644 --- a/ocml/src/hypotH.cl +++ b/ocml/src/hypotH.cl @@ -19,8 +19,7 @@ MATH_MANGLE(hypot)(half x, half y) half ret = (half)BUILTIN_SQRT_F32(d2); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISINF_F16(x) | BUILTIN_ISINF_F16(y)) ? - PINF_F16 : ret; + ret = (BUILTIN_ISINF_F16(x) | BUILTIN_ISINF_F16(y)) ? PINF_F16 : ret; } return ret; diff --git a/ocml/src/j0D.cl b/ocml/src/j0D.cl index 0ea2af36..df4ee9cf 100644 --- a/ocml/src/j0D.cl +++ b/ocml/src/j0D.cl @@ -85,7 +85,7 @@ MATH_MANGLE(j0)(double x) double r2 = r*r; double p = MATH_PRIVATE(bp0)(r2) * r; ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p); - ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret; + ret = x == PINF_F64 ? 0.0 : ret; } return ret; diff --git a/ocml/src/j0F.cl b/ocml/src/j0F.cl index 1c1e3fed..8cdc5205 100644 --- a/ocml/src/j0F.cl +++ b/ocml/src/j0F.cl @@ -80,7 +80,7 @@ MATH_MANGLE(j0)(float x) float r2 = r*r; float p = MATH_PRIVATE(bp0)(r2) * r; ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(cosb)(x, 0, p); - ret = BUILTIN_CLASS_F32(x, CLASS_PINF) ? 0.0f : ret; + ret = x == PINF_F32 ? 0.0f : ret; } return ret; diff --git a/ocml/src/j1D.cl b/ocml/src/j1D.cl index b9a4cc2d..1884b4dc 100644 --- a/ocml/src/j1D.cl +++ b/ocml/src/j1D.cl @@ -85,7 +85,7 @@ MATH_MANGLE(j1)(double x) double r2 = r*r; double p = MATH_PRIVATE(bp1)(r2) * r; ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(ax, 1, p); - ret = BUILTIN_CLASS_F64(ax, CLASS_PINF) ? 0.0 : ret; + ret = ax == PINF_F64 ? 0.0 : ret; } if (x < 0.0) diff --git a/ocml/src/j1F.cl b/ocml/src/j1F.cl index 4680b4fc..ce02abf5 100644 --- a/ocml/src/j1F.cl +++ b/ocml/src/j1F.cl @@ -79,7 +79,7 @@ MATH_MANGLE(j1)(float x) float r2 = r*r; float p = MATH_PRIVATE(bp1)(r2) * r; ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(ax) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(cosb)(ax, 1, p); - ret = BUILTIN_CLASS_F32(ax, CLASS_PINF) ? 0.0f : ret; + ret = ax == PINF_F32 ? 0.0f : ret; } if (x < 0.0f) diff --git a/ocml/src/log1pD.cl b/ocml/src/log1pD.cl index 86d135ee..12a9b45c 100644 --- a/ocml/src/log1pD.cl +++ b/ocml/src/log1pD.cl @@ -18,7 +18,7 @@ MATH_MANGLE(log1p)(double x) double z = MATH_PRIVATE(lnep)(add(1.0, x), 0); if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z; + z = x == PINF_F64 ? x : z; z = x < -1.0 ? QNAN_F64 : z; z = x == -1.0 ? NINF_F64 : z; } diff --git a/ocml/src/log1pF.cl b/ocml/src/log1pF.cl index 7e2b1a3d..53a33096 100644 --- a/ocml/src/log1pF.cl +++ b/ocml/src/log1pF.cl @@ -18,7 +18,7 @@ MATH_MANGLE(log1p)(float x) float z = MATH_PRIVATE(lnep)(add(1.0f, x), 0); if (!FINITE_ONLY_OPT()) { - z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z; + z = x == PINF_F32 ? x : z; z = x < -1.0f ? QNAN_F32 : z; z = x == -1.0f ? NINF_F32 : z; } diff --git a/ocml/src/rcbrtD.cl b/ocml/src/rcbrtD.cl index 902f2f7f..7aa4fd35 100644 --- a/ocml/src/rcbrtD.cl +++ b/ocml/src/rcbrtD.cl @@ -28,7 +28,7 @@ MATH_MANGLE(rcbrt)(double x) c = BUILTIN_FLDEXP_F64(c, -e); if (!FINITE_ONLY_OPT()) { - c = BUILTIN_CLASS_F64(a, CLASS_PINF) ? 0.0 : c; + c = a == PINF_F64 ? 0.0 : c; c = x == 0.0 ? PINF_F64 : c; } diff --git a/ocml/src/scalbD.cl b/ocml/src/scalbD.cl index 1269103f..2c229216 100644 --- a/ocml/src/scalbD.cl +++ b/ocml/src/scalbD.cl @@ -15,8 +15,8 @@ MATH_MANGLE(scalb)(double x, double y) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? QNAN_F64 : ret; - ret = (BUILTIN_ISZERO_F64(x) & BUILTIN_CLASS_F64(y, CLASS_PINF)) ? QNAN_F64 : ret; - ret = (BUILTIN_ISINF_F64(x) & BUILTIN_CLASS_F64(y, CLASS_NINF)) ? QNAN_F64 : ret; + ret = (BUILTIN_ISZERO_F64(x) & (y == PINF_F64)) ? QNAN_F64 : ret; + ret = (BUILTIN_ISINF_F64(x) & (y == NINF_F64)) ? QNAN_F64 : ret; } return ret; diff --git a/ocml/src/scalbF.cl b/ocml/src/scalbF.cl index 89e2ddd3..3bb48cd7 100644 --- a/ocml/src/scalbF.cl +++ b/ocml/src/scalbF.cl @@ -15,8 +15,8 @@ MATH_MANGLE(scalb)(float x, float y) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? QNAN_F32 : ret; - ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_PINF)) ? QNAN_F32 : ret; - ret = (BUILTIN_ISINF_F32(x) & BUILTIN_CLASS_F32(y, CLASS_NINF)) ? QNAN_F32 : ret; + ret = (BUILTIN_ISINF_F32(x) & (y == PINF_F32)) ? QNAN_F32 : ret; + ret = (BUILTIN_ISINF_F32(x) & (y == NINF_F32)) ? QNAN_F32 : ret; } return ret; diff --git a/ocml/src/scalbH.cl b/ocml/src/scalbH.cl index ed90be6e..ddeb2d60 100644 --- a/ocml/src/scalbH.cl +++ b/ocml/src/scalbH.cl @@ -17,8 +17,8 @@ MATH_MANGLE(scalb)(half x, half y) if (!FINITE_ONLY_OPT()) { ret = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? QNAN_F16 : ret; - ret = (BUILTIN_ISZERO_F16(x) & BUILTIN_CLASS_F16(y, CLASS_PINF)) ? QNAN_F16 : ret; - ret = (BUILTIN_ISINF_F16(x) & BUILTIN_CLASS_F16(y, CLASS_NINF)) ? QNAN_F16 : ret; + ret = (BUILTIN_ISZERO_F16(x) & (y == PINF_F16)) ? QNAN_F16 : ret; + ret = (BUILTIN_ISINF_F16(x) & (y == NINF_F16)) ? QNAN_F16 : ret; } return ret; diff --git a/ocml/src/y0D.cl b/ocml/src/y0D.cl index 036f738f..b296c054 100644 --- a/ocml/src/y0D.cl +++ b/ocml/src/y0D.cl @@ -146,7 +146,7 @@ MATH_MANGLE(y0)(double x) double r2 = r*r; double p = MATH_PRIVATE(bp0)(r2) * r; ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(sinb)(x, 0, p); - ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret; + ret = x == PINF_F64 ? 0.0 : ret; } return ret; diff --git a/ocml/src/y0F.cl b/ocml/src/y0F.cl index ce86637f..751f2046 100644 --- a/ocml/src/y0F.cl +++ b/ocml/src/y0F.cl @@ -142,7 +142,7 @@ MATH_MANGLE(y0)(float x) float r2 = r*r; float p = MATH_PRIVATE(bp0)(r2) * r; ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba0)(r2) * MATH_PRIVATE(sinb)(x, 0, p); - ret = BUILTIN_CLASS_F32(x, CLASS_PINF) ? 0.0f : ret; + ret = x == PINF_F32 ? 0.0f : ret; } return ret; diff --git a/ocml/src/y1D.cl b/ocml/src/y1D.cl index b6a5b664..0665a3e3 100644 --- a/ocml/src/y1D.cl +++ b/ocml/src/y1D.cl @@ -152,7 +152,7 @@ MATH_MANGLE(y1)(double x) double r2 = r*r; double p = MATH_PRIVATE(bp1)(r2) * r; ret = 0x1.9884533d43651p-1 * MATH_FAST_SQRT(r) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(sinb)(x, 1, p); - ret = BUILTIN_CLASS_F64(x, CLASS_PINF) ? 0.0 : ret; + ret = x == PINF_F64 ? 0.0 : ret; } return ret; diff --git a/ocml/src/y1F.cl b/ocml/src/y1F.cl index 8145e0ad..3f1d9b42 100644 --- a/ocml/src/y1F.cl +++ b/ocml/src/y1F.cl @@ -149,7 +149,7 @@ MATH_MANGLE(y1)(float x) float r2 = r*r; float p = MATH_PRIVATE(bp1)(r2) * r; ret = 0x1.988454p-1f * BUILTIN_RSQRT_F32(x) * MATH_PRIVATE(ba1)(r2) * MATH_PRIVATE(sinb)(x, 1, p); - ret = BUILTIN_CLASS_F32(x, CLASS_PINF) ? 0.0f : ret; + ret = x == PINF_F32 ? 0.0f : ret; } return ret; From 4d8e283e8ff17e89b7502649a78ac58d6523f4a3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Dec 2022 15:12:03 -0500 Subject: [PATCH 22/22] Use BUILTIN_ISUNORDERED_* Change-Id: I5e45cb82854569929318b428a93f0d1ea5af55ac --- ocml/src/atan2D.cl | 3 +-- ocml/src/atan2F.cl | 3 +-- ocml/src/atan2H.cl | 3 +-- ocml/src/atan2piD.cl | 3 +-- ocml/src/atan2piF.cl | 4 +--- ocml/src/atan2piH.cl | 4 +--- ocml/src/csqrtD.cl | 2 +- ocml/src/hypotD.cl | 3 +-- ocml/src/len4D.cl | 4 ++-- ocml/src/rhypotD.cl | 3 +-- ocml/src/scalbD.cl | 2 +- ocml/src/scalbF.cl | 2 +- ocml/src/scalbH.cl | 2 +- 13 files changed, 14 insertions(+), 24 deletions(-) diff --git a/ocml/src/atan2D.cl b/ocml/src/atan2D.cl index 6c962374..5c5e76bb 100644 --- a/ocml/src/atan2D.cl +++ b/ocml/src/atan2D.cl @@ -40,8 +40,7 @@ MATH_MANGLE(atan2)(double y, double x) t = BUILTIN_COPYSIGN_F64(t, y); a = (BUILTIN_ISINF_F64(x) & BUILTIN_ISINF_F64(y)) ? t : a; - a = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - QNAN_F64 : a; + a = BUILTIN_ISUNORDERED_F64(x, y) ? QNAN_F64 : a; } return BUILTIN_COPYSIGN_F64(a, y); diff --git a/ocml/src/atan2F.cl b/ocml/src/atan2F.cl index 573b6fcd..78db7fae 100644 --- a/ocml/src/atan2F.cl +++ b/ocml/src/atan2F.cl @@ -46,8 +46,7 @@ MATH_MANGLE(atan2)(float y, float x) a = (BUILTIN_ISINF_F32(x) & BUILTIN_ISINF_F32(y)) ? t : a; // x or y is NaN - a = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? - QNAN_F32 : a; + a = BUILTIN_ISUNORDERED_F32(x, y) ? QNAN_F32 : a; } return BUILTIN_COPYSIGN_F32(a, y); diff --git a/ocml/src/atan2H.cl b/ocml/src/atan2H.cl index 42dde2a0..c60e65eb 100644 --- a/ocml/src/atan2H.cl +++ b/ocml/src/atan2H.cl @@ -43,8 +43,7 @@ MATH_MANGLE(atan2)(half y, half x) a = (BUILTIN_ISINF_F16(x) & BUILTIN_ISINF_F16(y)) ? t : a; // x or y is NaN - a = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? - QNAN_F16 : a; + a = BUILTIN_ISUNORDERED_F16(x, y) ? QNAN_F16 : a; } return BUILTIN_COPYSIGN_F16(a, y); diff --git a/ocml/src/atan2piD.cl b/ocml/src/atan2piD.cl index fd18a2cb..f04680cb 100644 --- a/ocml/src/atan2piD.cl +++ b/ocml/src/atan2piD.cl @@ -37,8 +37,7 @@ MATH_MANGLE(atan2pi)(double y, double x) t = BUILTIN_COPYSIGN_F64(t, y); a = (BUILTIN_ISINF_F64(x) & BUILTIN_ISINF_F64(y)) ? t : a; - a = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - QNAN_F64 : a; + a = BUILTIN_ISUNORDERED_F64(x, y) ? QNAN_F64 : a; } return BUILTIN_COPYSIGN_F64(a, y); diff --git a/ocml/src/atan2piF.cl b/ocml/src/atan2piF.cl index 9418eeea..d55c845c 100644 --- a/ocml/src/atan2piF.cl +++ b/ocml/src/atan2piF.cl @@ -43,10 +43,8 @@ MATH_MANGLE(atan2pi)(float y, float x) a = (BUILTIN_ISINF_F32(x) & BUILTIN_ISINF_F32(y)) ? at : a; // x or y is NaN - a = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? - QNAN_F32 : a; + a = BUILTIN_ISUNORDERED_F32(x, y) ? QNAN_F32 : a; } return BUILTIN_COPYSIGN_F32(a, y); } - diff --git a/ocml/src/atan2piH.cl b/ocml/src/atan2piH.cl index ff5dbed6..fb2c0341 100644 --- a/ocml/src/atan2piH.cl +++ b/ocml/src/atan2piH.cl @@ -40,10 +40,8 @@ MATH_MANGLE(atan2pi)(half y, half x) at : a; // x or y is NaN - a = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? - QNAN_F16 : a; + a = BUILTIN_ISUNORDERED_F16(x, y) ? QNAN_F16 : a; } return BUILTIN_COPYSIGN_F16(a, y); } - diff --git a/ocml/src/csqrtD.cl b/ocml/src/csqrtD.cl index 78498374..8614c825 100644 --- a/ocml/src/csqrtD.cl +++ b/ocml/src/csqrtD.cl @@ -15,7 +15,7 @@ MATH_MANGLE(csqrt)(double2 z) double t = BUILTIN_MAX_F64(a, b); if (!FINITE_ONLY_OPT()) { - t = (BUILTIN_ISNAN_F64(a) | BUILTIN_ISNAN_F64(b)) ? QNAN_F64 : t; + t = BUILTIN_ISUNORDERED_F64(a, b) ? QNAN_F64 : t; } int e = BUILTIN_FREXP_EXP_F64(t); diff --git a/ocml/src/hypotD.cl b/ocml/src/hypotD.cl index a0b5b56a..efcca4db 100644 --- a/ocml/src/hypotD.cl +++ b/ocml/src/hypotD.cl @@ -19,8 +19,7 @@ MATH_MANGLE(hypot)(double x, double y) double ret = BUILTIN_FLDEXP_F64(MATH_FAST_SQRT(MATH_MAD(a, a, b*b)), e); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - QNAN_F64 : ret; + ret = BUILTIN_ISUNORDERED_F64(x, y) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? PINF_F64 : ret; } diff --git a/ocml/src/len4D.cl b/ocml/src/len4D.cl index 1b8f5c87..334a4ceb 100644 --- a/ocml/src/len4D.cl +++ b/ocml/src/len4D.cl @@ -39,8 +39,8 @@ MATH_MANGLE(len4)(double x, double y, double z, double w) double ret = BUILTIN_FLDEXP_F64(MATH_FAST_SQRT(MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d)))), e); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) | - BUILTIN_ISNAN_F64(z) | BUILTIN_ISNAN_F64(w)) ? QNAN_F64 : ret; + ret = (BUILTIN_ISUNORDERED_F64(x, y) | + BUILTIN_ISUNORDERED_F64(z, w)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y) | BUILTIN_ISINF_F64(z) | diff --git a/ocml/src/rhypotD.cl b/ocml/src/rhypotD.cl index 126b01d3..8d868e73 100644 --- a/ocml/src/rhypotD.cl +++ b/ocml/src/rhypotD.cl @@ -25,8 +25,7 @@ MATH_MANGLE(rhypot)(double x, double y) if (!FINITE_ONLY_OPT()) { ret = t == 0.0 ? PINF_F64 : ret; - ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? - QNAN_F64 : ret; + ret = BUILTIN_ISUNORDERED_F64(x, y) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) | BUILTIN_ISINF_F64(y)) ? 0.0 : ret; } diff --git a/ocml/src/scalbD.cl b/ocml/src/scalbD.cl index 2c229216..cb9e2688 100644 --- a/ocml/src/scalbD.cl +++ b/ocml/src/scalbD.cl @@ -14,7 +14,7 @@ MATH_MANGLE(scalb)(double x, double y) double ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F64(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y)) ? QNAN_F64 : ret; + ret = BUILTIN_ISUNORDERED_F64(x, y) ? QNAN_F64 : ret; ret = (BUILTIN_ISZERO_F64(x) & (y == PINF_F64)) ? QNAN_F64 : ret; ret = (BUILTIN_ISINF_F64(x) & (y == NINF_F64)) ? QNAN_F64 : ret; } diff --git a/ocml/src/scalbF.cl b/ocml/src/scalbF.cl index 3bb48cd7..dbdbebe0 100644 --- a/ocml/src/scalbF.cl +++ b/ocml/src/scalbF.cl @@ -14,7 +14,7 @@ MATH_MANGLE(scalb)(float x, float y) float ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F32(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F32(x) | BUILTIN_ISNAN_F32(y)) ? QNAN_F32 : ret; + ret = BUILTIN_ISUNORDERED_F32(x, y) ? QNAN_F32 : ret; ret = (BUILTIN_ISINF_F32(x) & (y == PINF_F32)) ? QNAN_F32 : ret; ret = (BUILTIN_ISINF_F32(x) & (y == NINF_F32)) ? QNAN_F32 : ret; } diff --git a/ocml/src/scalbH.cl b/ocml/src/scalbH.cl index ddeb2d60..0e243ff6 100644 --- a/ocml/src/scalbH.cl +++ b/ocml/src/scalbH.cl @@ -16,7 +16,7 @@ MATH_MANGLE(scalb)(half x, half y) half ret = MATH_MANGLE(ldexp)(x, (int)BUILTIN_RINT_F16(t)); if (!FINITE_ONLY_OPT()) { - ret = (BUILTIN_ISNAN_F16(x) | BUILTIN_ISNAN_F16(y)) ? QNAN_F16 : ret; + ret = BUILTIN_ISUNORDERED_F16(x, y) ? QNAN_F16 : ret; ret = (BUILTIN_ISZERO_F16(x) & (y == PINF_F16)) ? QNAN_F16 : ret; ret = (BUILTIN_ISINF_F16(x) & (y == NINF_F16)) ? QNAN_F16 : ret; }