Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enh: remove simd_nightly feature flag for aarch64 #63

Merged
merged 18 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ exclude = [".git*", "dev_utils/**/*", "tests/**/*"]


[dependencies]
num-traits = { version = "0.2.17", default-features = false }
num-traits = { version = "0.2", default-features = false }
half = { version = "2.3.1", default-features = false, features=["num-traits"], optional = true }
ndarray = { version = "0.15.6", default-features = false, optional = true}
arrow = { version = ">0", default-features = false, optional = true}
arrow2 = { version = ">0.0", default-features = false, optional = true}
ndarray = { version = "0.15.6", default-features = false, optional = true }
arrow = { version = ">0", default-features = false, optional = true }
arrow2 = { version = ">0.0", default-features = false, optional = true }
# once_cell = "1.16.0"

[features]
Expand Down
101 changes: 19 additions & 82 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ pub(crate) use simd::AVX512;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) use simd::{SIMDArgMinMax, AVX2, SSE};
#[cfg(any(
all(target_arch = "aarch64", feature = "float"), // is stable for f64
all(any(target_arch = "arm", target_arch = "aarch64"), feature = "nightly_simd"),
all(target_arch = "arm", feature = "nightly_simd"),
target_arch = "aarch64"
))]
pub(crate) use simd::{SIMDArgMinMax, NEON};

Expand Down Expand Up @@ -304,9 +304,9 @@ macro_rules! impl_argminmax_int {
return unsafe { SSE::<Int>::argminmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$int_type>::NB_BITS < 64) {
if std::arch::is_aarch64_feature_detected!("neon") & (<$int_type>::NB_BITS < 64) {
jvdd marked this conversation as resolved.
Show resolved Hide resolved
// Scalar is faster for 64-bit numbers
return unsafe { NEON::<Int>::argminmax(self) }
}
Expand Down Expand Up @@ -349,7 +349,7 @@ macro_rules! impl_argminmax_int {
return unsafe { SSE::<Int>::argmin(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<Int>::argmin(self) }
Expand Down Expand Up @@ -392,7 +392,7 @@ macro_rules! impl_argminmax_int {
return unsafe { SSE::<Int>::argmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<Int>::argmax(self) }
Expand Down Expand Up @@ -443,10 +443,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatIgnoreNaN>::argminmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// NEON f64 is part of stable Rust (see code below this macro)
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatIgnoreNaN>::argminmax(self) }
}
}
Expand Down Expand Up @@ -483,10 +482,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatIgnoreNaN>::argmin(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// NEON f64 is part of stable Rust (see code below this macro)
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatIgnoreNaN>::argmin(self) }
}
}
Expand Down Expand Up @@ -523,10 +521,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatIgnoreNaN>::argmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// NEON f64 is part of stable Rust (see code below this macro)
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatIgnoreNaN>::argmax(self) }
}
}
Expand Down Expand Up @@ -563,10 +560,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatReturnNaN>::argminmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// We miss some NEON instructions for 64-bit numbers
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argminmax(self) }
}
}
Expand Down Expand Up @@ -601,10 +597,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatReturnNaN>::argmin(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// We miss some NEON instructions for 64-bit numbers
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argmin(self) }
}
}
Expand Down Expand Up @@ -639,10 +634,9 @@ macro_rules! impl_argminmax_float {
return unsafe { SSE::<FloatReturnNaN>::argmax(self) }
}
}
#[cfg(all(target_arch = "aarch64", feature = "nightly_simd"))]
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") & (<$float_type>::NB_BITS < 64) {
// We miss some NEON instructions for 64-bit numbers
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argmax(self) }
}
}
Expand All @@ -660,68 +654,11 @@ macro_rules! impl_argminmax_float {
};
}

/// Implement ArgMinMax for &[f64] on aarch64 as NEON intrinsics for f64
/// are part of stable Rust on aarch64.
// Note: implementing this in a distinct impl block seemed more clean than
// hacking with unimpl_ macros in the simd_.rs files to avoid complaints
// from the compiler..
#[cfg(all(feature = "float", target_arch = "aarch64"))]
impl ArgMinMax for &[f64] {
fn argminmax(&self) -> (usize, usize) {
unsafe { NEON::<FloatIgnoreNaN>::argminmax(self) }
}
fn argmin(&self) -> usize {
unsafe { NEON::<FloatIgnoreNaN>::argmin(self) }
}
fn argmax(&self) -> usize {
unsafe { NEON::<FloatIgnoreNaN>::argmax(self) }
}
}

/// Implement NaNArgMinMax for &[f64] on aarch64 - the required intrinsics
/// for return nan are not part of stable Rust.
// Note: implementing this in a distinct impl block seemed more clean than
// hacking with unimpl_ macros in the simd_.rs files to avoid complaints
// from the compiler..
#[cfg(all(feature = "float", target_arch = "aarch64"))]
impl NaNArgMinMax for &[f64] {
fn nanargminmax(&self) -> (usize, usize) {
#[cfg(feature = "nightly_simd")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argminmax(self) };
}
}
SCALAR::<FloatReturnNaN>::argminmax(self)
}
fn nanargmin(&self) -> usize {
#[cfg(feature = "nightly_simd")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argmin(self) };
}
}
SCALAR::<FloatReturnNaN>::argmin(self)
}
fn nanargmax(&self) -> usize {
#[cfg(feature = "nightly_simd")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return unsafe { NEON::<FloatReturnNaN>::argmax(self) };
}
}
SCALAR::<FloatReturnNaN>::argmax(self)
}
}

// Implement ArgMinMax for (non-optional) integer rust primitive types
impl_argminmax_int!(i8, i16, i32, i64, u8, u16, u32, u64);
// Implement for (optional) float rust primitive types
#[cfg(all(feature = "float", not(target_arch = "aarch64")))]
#[cfg(feature = "float")]
impl_argminmax_float!(f32, f64);
// For aarch64 f64 is implemented in the two impl blocks above
#[cfg(all(feature = "float", target_arch = "aarch64"))]
impl_argminmax_float!(f32);

// Implement ArgMinMax for other data types
#[cfg(feature = "half")]
Expand Down
40 changes: 30 additions & 10 deletions src/simd/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ where
/// - `impl_SIMDInit_Int!`
/// - called in the `simd_i*.rs` files
/// - called in the `simd_u*.rs` files
/// - `impl_SIMDInit_FloatIgnoreNaN!`
/// - see the `simd_f*_return_nan.rs` files
/// - `impl_SIMDInit_FloatReturnNaN!`
/// - see the `simd_f*_return_nan.rs` files
/// - `impl_SIMDInit_FloatIgnoreNaN!`
/// - see the `simd_f*_ignore_nan.rs` files
///
/// The current (default) implementation is for the Int case - see `impl_SIMDInit_Int!`
Expand Down Expand Up @@ -181,7 +181,12 @@ where

// --------------- Int (signed and unsigned)

#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "nightly_simd"))]
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
feature = "nightly_simd"
))]
macro_rules! impl_SIMDInit_Int {
($scalar_dtype:ty, $simd_vec_dtype:ty, $simd_mask_dtype:ty, $lane_size:expr, $simd_struct:ty) => {
impl SIMDInit<$scalar_dtype, $simd_vec_dtype, $simd_mask_dtype, $lane_size>
Expand All @@ -192,13 +197,23 @@ macro_rules! impl_SIMDInit_Int {
};
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "nightly_simd"))]
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
feature = "nightly_simd"
))]
pub(crate) use impl_SIMDInit_Int; // Now classic paths Just Work™

// --------------- Float Return NaNs

#[cfg(any(feature = "float", feature = "half"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "nightly_simd"))]
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
feature = "nightly_simd"
))]
macro_rules! impl_SIMDInit_FloatReturnNaN {
($scalar_dtype:ty, $simd_vec_dtype:ty, $simd_mask_dtype:ty, $lane_size:expr, $simd_struct:ty) => {
impl SIMDInit<$scalar_dtype, $simd_vec_dtype, $simd_mask_dtype, $lane_size>
Expand All @@ -221,7 +236,12 @@ macro_rules! impl_SIMDInit_FloatReturnNaN {
}

#[cfg(any(feature = "float", feature = "half"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "nightly_simd"))]
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
target_arch = "aarch64",
feature = "nightly_simd"
))]
pub(crate) use impl_SIMDInit_FloatReturnNaN; // Now classic paths Just Work™

// --------------- Float Ignore NaNs
Expand All @@ -230,7 +250,7 @@ pub(crate) use impl_SIMDInit_FloatReturnNaN; // Now classic paths Just Work™
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "aarch64", feature = "float"), // is stable for f64
target_arch = "aarch64", // is stable for f64
feature = "nightly_simd"
))]
macro_rules! impl_SIMDInit_FloatIgnoreNaN {
Expand Down Expand Up @@ -306,7 +326,7 @@ macro_rules! impl_SIMDInit_FloatIgnoreNaN {
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "aarch64", feature = "float"), // is stable for f64
target_arch = "aarch64", // is stable for f64
feature = "nightly_simd"
))]
pub(crate) use impl_SIMDInit_FloatIgnoreNaN; // Now classic paths Just Work™
Expand Down Expand Up @@ -768,7 +788,7 @@ where
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "aarch64", feature = "float"), // is stable for f64
target_arch = "aarch64",
feature = "nightly_simd"
))]
macro_rules! impl_SIMDArgMinMax {
Expand Down Expand Up @@ -806,7 +826,7 @@ macro_rules! impl_SIMDArgMinMax {
#[cfg(any(
target_arch = "x86",
target_arch = "x86_64",
all(target_arch = "aarch64", feature = "float"), // is stable for f64
target_arch = "aarch64",
feature = "nightly_simd"
))]
pub(crate) use impl_SIMDArgMinMax; // Now classic paths Just Work™
Expand Down
Loading
Loading