diff --git a/src/f32.rs b/src/f32.rs index a132af3..90d944e 100644 --- a/src/f32.rs +++ b/src/f32.rs @@ -88,10 +88,7 @@ pub use u10::{ }; mod u15; -#[rustfmt::skip] -pub use u15::{ - erfcf as erfc_u15, -}; +pub use u15::erfcf as erfc_u15; mod u35; #[rustfmt::skip] @@ -434,7 +431,7 @@ fn ilogbkf(mut d: f32) -> i32 { } else { d }; - let q = ((d.to_bits() >> 23) & 0xff) as i32; + let q = ((d.to_bits() as i32) >> 23) & 0xff; if m { q - (64 + 0x7f) } else { @@ -446,7 +443,7 @@ fn ilogbkf(mut d: f32) -> i32 { // normalized FP value. #[inline] fn ilogb2kf(d: f32) -> i32 { - ((d.to_bits() >> 23) & 0xff) as i32 - 0x7f + (((d.to_bits() as i32) >> 23) & 0xff) - 0x7f } #[inline] @@ -939,7 +936,7 @@ pub fn fmodf(x: f32, y: f32) -> f32 { if d == 0. { 0. } else { - f32::from_bits(d.to_bits() - 1) + f32::from_bits(((d.to_bits() as i32) - 1) as u32) } } diff --git a/src/f32/u05.rs b/src/f32/u05.rs index d1bbc5f..6e4fd56 100644 --- a/src/f32/u05.rs +++ b/src/f32/u05.rs @@ -180,7 +180,7 @@ pub fn sqrtf(mut d: f32) -> f32 { } // http://en.wikipedia.org/wiki/Fast_inverse_square_root - let mut x = f32::from_bits(0x_5f37_5a86 - ((d + 1e-45).to_bits() >> 1)); + let mut x = f32::from_bits((0x_5f37_5a86 - (((d + 1e-45).to_bits() as i32) >> 1)) as u32); x *= 1.5 - 0.5 * d * x * x; x *= 1.5 - 0.5 * d * x * x; diff --git a/src/f32/u10.rs b/src/f32/u10.rs index eaeacd9..2d9321b 100644 --- a/src/f32/u10.rs +++ b/src/f32/u10.rs @@ -1011,12 +1011,14 @@ pub fn powf(x: f32, y: f32) -> f32 { }; result *= if x >= 0. { 1. - } else if !yisint { - f32::NAN - } else if yisodd { - -1. + } else if yisint { + if yisodd { + -1. + } else { + 1. + } } else { - 1. + f32::NAN }; let efx = (fabsfk(x) - 1.).mul_sign(y); @@ -1025,12 +1027,12 @@ pub fn powf(x: f32, y: f32) -> f32 { } else if x.is_nan() || y.is_nan() { f32::NAN } else if x.is_infinite() || (x == 0.) { - (if yisodd { x.sign() } else { 1. }) - * (if (if x == 0. { -y } else { y }) < 0. { - 0. - } else { - f32::INFINITY - }) + (if y.is_sign_negative() ^ (x == 0.) { + 0. + } else { + f32::INFINITY + }) + .mul_sign(if yisodd { x } else { 1. }) } else if y.is_infinite() { if efx < 0. { 0. diff --git a/src/f32/u35.rs b/src/f32/u35.rs index 32f7ba0..9425ad3 100644 --- a/src/f32/u35.rs +++ b/src/f32/u35.rs @@ -796,7 +796,7 @@ pub fn sqrtf(mut d: f32) -> f32 { } // http://en.wikipedia.org/wiki/Fast_inverse_square_root - let mut x = f32::from_bits(0x_5f37_5a86 - ((d + 1e-45).to_bits() >> 1)); + let mut x = f32::from_bits((0x_5f37_5a86 - (((d + 1e-45).to_bits() as i32) >> 1)) as u32); x *= 1.5 - 0.5 * d * x * x; x *= 1.5 - 0.5 * d * x * x; diff --git a/src/f32x.rs b/src/f32x.rs index 8203b66..f592c5e 100644 --- a/src/f32x.rs +++ b/src/f32x.rs @@ -538,7 +538,7 @@ where { let o = d.simd_lt(F32x::splat(5.421_010_862_427_522_e-20)); d = o.select(F32x::splat(1.844_674_407_370_955_2_e19) * d, d); - let q = (d.to_bits() >> U32x::splat(23)).cast() & I32x::splat(0xff); + let q = (d.to_bits().cast() >> I32x::splat(23)) & I32x::splat(0xff); q - o.select(I32x::splat(64 + 0x7f), I32x::splat(0x7f)) } @@ -551,8 +551,8 @@ pub(crate) fn ilogb2kf(d: F32x) -> I32x where LaneCount: SupportedLaneCount, { - let q = d.to_bits(); - let mut q = (q >> U32x::splat(23)).cast(); + let q = d.to_bits().cast(); + let mut q = q >> I32x::splat(23); q &= I32x::splat(0xff); q - I32x::splat(0x7f) } diff --git a/src/f32x/u05.rs b/src/f32x/u05.rs index 0fbdabc..6b20718 100644 --- a/src/f32x/u05.rs +++ b/src/f32x/u05.rs @@ -149,7 +149,7 @@ where let mut x = F32x::from_bits( (I32x::splat(0x_5f37_5a86) - - ((d + F32x::splat(1e-45)).to_bits() >> U32x::splat(1)).cast()) + - ((d + F32x::splat(1e-45)).to_bits().cast() >> I32x::splat(1))) .cast(), ); diff --git a/src/f32x/u10.rs b/src/f32x/u10.rs index 2c64947..dd41429 100644 --- a/src/f32x/u10.rs +++ b/src/f32x/u10.rs @@ -1395,15 +1395,9 @@ where ); result = (x.is_infinite() | x.simd_eq(F32x::ZERO)).select( - yisodd.select(x.sign(), F32x::ONE) - * F32x::from_bits( - !x.simd_eq(F32x::ZERO) - .select(-y, y) - .simd_lt(F32x::ZERO) - .to_int() - .cast::() - & F32x::INFINITY.to_bits(), - ), + (y.is_sign_negative() ^ x.simd_eq(F32x::ZERO)) + .select(F32x::ZERO, F32x::INFINITY) + .mul_sign(yisodd.select(x, F32x::ONE)), result, ); diff --git a/src/f32x/u35.rs b/src/f32x/u35.rs index 0ffdec2..ee5c4c9 100644 --- a/src/f32x/u35.rs +++ b/src/f32x/u35.rs @@ -1124,10 +1124,10 @@ where { let e = F32x::from_bits( U32x::splat(0x_2000_0000) - + (U32x::splat(0x_7f00_0000) & (d.to_bits() >> U32x::splat(1))), + + (U32x::splat(0x_7f00_0000) & (d.to_bits().cast() >> I32x::splat(1))), ); let m = F32x::from_bits( - I32x::splat(0x_3f00_0000) + (I32x::splat(0x_01ff_ffff) & I32x::from_bits(d)), + I32x::splat(0x_3f00_0000) + (I32x::splat(0x_01ff_ffff) & d.to_bits().cast()), ); let mut x = vrsqrteq_f32(m); x = vmulq_f32(x, vrsqrtsq_f32(m, vmulq_f32(x, x))); diff --git a/src/f64.rs b/src/f64.rs index 72c767e..20b6cd7 100644 --- a/src/f64.rs +++ b/src/f64.rs @@ -119,10 +119,7 @@ pub use u10::{ }; mod u15; -#[rustfmt::skip] -pub use u15::{ - erfc as erfc_u15, -}; +pub use u15::erfc as erfc_u15; mod u35; #[rustfmt::skip] pub use u35::{ @@ -876,7 +873,7 @@ pub fn nextafter(x: f64, y: f64) -> f64 { let mut cxi = x.to_bits() as i64; let c = (cxi < 0) == (y < x); if c { - cxi = -(cxi ^ (1 << 63)); + cxi = -(cxi ^ i64::MIN); } if x != y { @@ -884,7 +881,7 @@ pub fn nextafter(x: f64, y: f64) -> f64 { } if c { - cxi = -(((cxi as u64) ^ (1u64 << 63)) as i64); + cxi = -(cxi ^ i64::MIN); } let cxf = f64::from_bits(cxi as u64); diff --git a/src/f64/u10.rs b/src/f64/u10.rs index 39cdc1f..489c14f 100644 --- a/src/f64/u10.rs +++ b/src/f64/u10.rs @@ -1282,12 +1282,14 @@ pub fn pow(x: f64, y: f64) -> f64 { }; result *= if x > 0. { 1. - } else if !yisint { - f64::NAN - } else if yisodd { - -1. + } else if yisint { + if yisodd { + -1. + } else { + 1. + } } else { - 1. + f64::NAN }; let efx = (fabsk(x) - 1.).mul_sign(y); @@ -1302,12 +1304,12 @@ pub fn pow(x: f64, y: f64) -> f64 { f64::INFINITY } } else if x.is_infinite() || (x == 0.) { - (if yisodd { x.sign() } else { 1. }) - * (if (if x == 0. { -y } else { y }) < 0. { - 0. - } else { - f64::INFINITY - }) + (if y.is_sign_negative() ^ (x == 0.) { + 0. + } else { + f64::INFINITY + }) + .mul_sign(if yisodd { x } else { 1. }) } else if x.is_nan() || y.is_nan() { f64::NAN } else { diff --git a/src/f64x.rs b/src/f64x.rs index 1cabd8b..d5e147d 100644 --- a/src/f64x.rs +++ b/src/f64x.rs @@ -295,19 +295,6 @@ where F64x::from_array(ar) } -#[inline] -fn swap_upper_lower(i: I64x) -> I64x -where - LaneCount: SupportedLaneCount, -{ - // i.rotate_left(I64x::splat(32)) - let mut ar = i.to_array(); - for v in &mut ar { - *v = v.rotate_left(32); - } - I64x::from_array(ar) -} - impl Round for F64x where LaneCount: SupportedLaneCount, @@ -564,7 +551,7 @@ where let o = d.simd_lt(F64x::splat(4.909_093_465_297_726_6_e-91)); d = o.select(F64x::splat(2.037_035_976_334_486_e90) * d, d); let mut q = cast_from_upper(d.to_bits()); - q &= Ix::splat((((1u32 << 12) - 1) << 20) as _); + q &= Ix::splat(((1 << 12) - 1) << 20); q = (q.cast() >> Ux::splat(20)).cast(); q - o.cast().select(Ix::splat(300 + 0x3ff), Ix::splat(0x3ff)) } @@ -912,47 +899,14 @@ where LaneCount: SupportedLaneCount, { let x = x.simd_eq(F64x::ZERO).select(F64x::ZERO.mul_sign(y), x); - let mut xi2 = x.to_bits().cast::(); + let xi2 = x.to_bits().cast::(); let c = x.is_sign_negative() ^ y.simd_ge(x); - let mut t = (xi2 ^ I64x::splat(0x_7fff_ffff_ffff_ffff_u64 as _)) + I64x::splat(1); - t += swap_upper_lower( - I64x::splat(1) - & t.simd_eq(I64x::splat(0x_ffff_ffff_0000_0000_u64 as _)) - .to_int(), - ); - xi2 = c - .select(F64x::from_bits(t.cast()), F64x::from_bits(xi2.cast())) - .to_bits() - .cast(); - - xi2 -= (x.simd_ne(y).to_int().cast() & U64x::splat(1)).cast(); - - xi2 = x - .simd_ne(y) - .select( - F64x::from_bits( - (xi2 + swap_upper_lower( - I64x::splat(0x_ffff_ffff_u64 as _) - & xi2.simd_eq(I64x::splat(0x_ffff_ffff_u64 as _)).to_int(), - )) - .cast(), - ), - F64x::from_bits(xi2.cast()), - ) - .to_bits() - .cast(); - - let mut t = (xi2 ^ I64x::splat(0x_7fff_ffff_ffff_ffff_u64 as _)) + I64x::splat(1); - t += swap_upper_lower( - I64x::splat(1) - & t.simd_eq(I64x::splat(0x_ffff_ffff_0000_0000_u64 as _)) - .to_int(), - ); - xi2 = c - .select(F64x::from_bits(t.cast()), F64x::from_bits(xi2.cast())) - .to_bits() - .cast(); + let xi2 = c.select(-(xi2 ^ I64x::splat(i64::MIN)), xi2); + + let xi2 = x.simd_ne(y).select(xi2 - I64x::splat(1), xi2); + + let xi2 = c.select(-(xi2 ^ I64x::splat(i64::MIN)), xi2); let mut ret = F64x::from_bits(xi2.cast()); diff --git a/src/f64x/u10.rs b/src/f64x/u10.rs index 2a790dc..de61b95 100644 --- a/src/f64x/u10.rs +++ b/src/f64x/u10.rs @@ -1222,14 +1222,21 @@ where let x = F64x::splat(-1.).add_as_doubled(m) / F64x::ONE.add_as_doubled(m); let x2 = x.0 * x.0; + let x4 = x2 * x2; + let x8 = x4 * x4; - let t = F64x::splat(0.153_207_698_850_270_135_3) - .mla(x2, F64x::splat(0.152_562_905_100_342_871_6)) - .mla(x2, F64x::splat(0.181_860_593_293_778_599_6)) - .mla(x2, F64x::splat(0.222_221_451_983_938_000_9)) - .mla(x2, F64x::splat(0.285_714_293_279_429_931_7)) - .mla(x2, F64x::splat(0.399_999_999_963_525_199)) - .mla(x2, F64x::splat(0.666_666_666_666_733_354_1)); + let t = F64x::poly7( + x2, + x4, + x8, + 0.153_207_698_850_270_135_3, + 0.152_562_905_100_342_871_6, + 0.181_860_593_293_778_599_6, + 0.222_221_451_983_938_000_9, + 0.285_714_293_279_429_931_7, + 0.399_999_999_963_525_199, + 0.666_666_666_666_733_354_1, + ); s = s.add_checked(x.scale(F64x::splat(2.))); s = s.add_checked(x2 * x.0 * t); @@ -1799,15 +1806,9 @@ where ); result = (x.is_infinite() | x.simd_eq(F64x::ZERO)).select( - yisodd.select(x.sign(), F64x::ONE) - * F64x::from_bits( - !x.simd_eq(F64x::ZERO) - .select(-y, y) - .simd_lt(F64x::ZERO) - .to_int() - .cast::() - & F64x::INFINITY.to_bits(), - ), + (y.is_sign_negative() ^ x.simd_eq(F64x::ZERO)) + .select(F64x::ZERO, F64x::INFINITY) + .mul_sign(yisodd.select(x, F64x::ONE)), result, );