Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert building shuffle.c with SSE2/AVX2 compilation flags #373

Merged
merged 4 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions blosc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,9 @@ if(COMPILER_SUPPORT_SSE2)
# MSVC targets SSE2 by default on 64-bit configurations, but not 32-bit configurations.
if (${CMAKE_SIZEOF_VOID_P} EQUAL 4)
set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS "/arch:SSE2")
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS "/arch:SSE2")
endif (${CMAKE_SIZEOF_VOID_P} EQUAL 4)
else (MSVC)
set_source_files_properties(shuffle-sse2.c bitshuffle-sse2.c PROPERTIES COMPILE_FLAGS -msse2)
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS -msse2)
endif (MSVC)

# Define a symbol for the shuffle-dispatch implementation
Expand All @@ -184,15 +178,9 @@ if(COMPILER_SUPPORT_AVX2)
if (MSVC)
set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c
PROPERTIES COMPILE_FLAGS "/arch:AVX2")
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS "/arch:AVX2")
else (MSVC)
set_source_files_properties(shuffle-avx2.c bitshuffle-avx2.c
PROPERTIES COMPILE_FLAGS -mavx2)
set_property(
SOURCE shuffle.c
APPEND PROPERTY COMPILE_OPTIONS -mavx2)
endif (MSVC)

# Define a symbol for the shuffle-dispatch implementation
Expand Down
17 changes: 15 additions & 2 deletions blosc/bitshuffle-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,21 @@
#include "bitshuffle-avx2.h"


/* Make sure AVX2 is available for the compilation target and compiler. */
#if defined(__AVX2__)
/* Define dummy functions if AVX2 is not available for the compilation target and compiler. */
#if !defined(__AVX2__)
#include <stdlib.h>

int64_t blosc_internal_bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
abort();
}

int64_t blosc_internal_bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
abort();
}

#else /* defined(__AVX2__) */

#include <immintrin.h>

Expand Down
17 changes: 15 additions & 2 deletions blosc/bitshuffle-sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,21 @@
#include "bitshuffle-generic.h"
#include "bitshuffle-sse2.h"

/* Make sure SSE2 is available for the compilation target and compiler. */
#if defined(__SSE2__)
/* Define dummy functions if SSE2 is not available for the compilation target and compiler. */
#if !defined(__SSE2__)
#include <stdlib.h>

int64_t blosc_internal_bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
abort();
}

int64_t blosc_internal_bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size,
const size_t elem_size, void* tmp_buf) {
abort();
}

#else /* defined(__SSE2__) */

#include <emmintrin.h>

Expand Down
19 changes: 17 additions & 2 deletions blosc/shuffle-avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,23 @@
#include "shuffle-generic.h"
#include "shuffle-avx2.h"

/* Make sure AVX2 is available for the compilation target and compiler. */
#if defined(__AVX2__)
/* Define dummy functions if AVX2 is not available for the compilation target and compiler. */
#if !defined(__AVX2__)
#include <stdlib.h>

void
blosc_internal_shuffle_avx2(const size_t bytesoftype, const size_t blocksize,
const uint8_t* const _src, uint8_t* const _dest) {
abort();
}

void
blosc_internal_unshuffle_avx2(const size_t bytesoftype, const size_t blocksize,
const uint8_t* const _src, uint8_t* const _dest) {
abort();
}

#else /* defined(__AVX2__) */

#include <immintrin.h>

Expand Down
18 changes: 16 additions & 2 deletions blosc/shuffle-sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,22 @@
#include "shuffle-generic.h"
#include "shuffle-sse2.h"

/* Make sure SSE2 is available for the compilation target and compiler. */
#if defined(__SSE2__)
/* Define dummy functions if SSE2 is not available for the compilation target and compiler. */
#if !defined(__SSE2__)

void
blosc_internal_shuffle_sse2(const size_t bytesoftype, const size_t blocksize,
const uint8_t* const _src, uint8_t* const _dest) {
abort();
}

void
blosc_internal_unshuffle_sse2(const size_t bytesoftype, const size_t blocksize,
const uint8_t* const _src, uint8_t* const _dest) {
abort();
}

# else /* defined(__SSE2__) */

#include <emmintrin.h>

Expand Down
29 changes: 12 additions & 17 deletions blosc/shuffle.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,19 @@ typedef unsigned char bool;
#define HAVE_CPU_FEAT_INTRIN
#endif

#if defined(SHUFFLE_AVX2_ENABLED) && defined(__AVX2__)
#define SHUFFLE_USE_AVX2
#endif

#if defined(SHUFFLE_SSE2_ENABLED) && defined(__SSE2__)
#define SHUFFLE_USE_SSE2
#endif

/* Include hardware-accelerated shuffle/unshuffle routines based on
the target architecture. Note that a target architecture may support
more than one type of acceleration!*/
#if defined(SHUFFLE_USE_AVX2)
#if defined(SHUFFLE_AVX2_ENABLED)
#include "shuffle-avx2.h"
#include "bitshuffle-avx2.h"
#endif /* defined(SHUFFLE_USE_AVX2) */
#endif /* defined(SHUFFLE_AVX2_ENABLED) */

#if defined(SHUFFLE_USE_SSE2)
#if defined(SHUFFLE_SSE2_ENABLED)
#include "shuffle-sse2.h"
#include "bitshuffle-sse2.h"
#endif /* defined(SHUFFLE_USE_SSE2) */
#endif /* defined(SHUFFLE_SSE2_ENABLED) */


/* Define function pointer types for shuffle/unshuffle routines. */
Expand Down Expand Up @@ -84,8 +77,10 @@ typedef enum {
} blosc_cpu_features;

/* Detect hardware and set function pointers to the best shuffle/unshuffle
implementations supported by the host processor. */
#if defined(SHUFFLE_USE_AVX2) || defined(SHUFFLE_USE_SSE2) /* Intel/i686 */
implementations supported by the host processor for Intel/i686
*/
#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) \
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From looking a bit around, that should detect x86 arch with main compilers.
I'd take any better way

&& (defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED))

/* Disabled the __builtin_cpu_supports() call, as it has issues with
new versions of gcc (like 5.3.1 in forthcoming ubuntu/xenial:
Expand Down Expand Up @@ -317,7 +312,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
blosc_cpu_features cpu_features = blosc_get_cpu_features();
shuffle_implementation_t impl_generic;

#if defined(SHUFFLE_USE_AVX2)
#if defined(SHUFFLE_AVX2_ENABLED)
if (cpu_features & BLOSC_HAVE_AVX2) {
shuffle_implementation_t impl_avx2;
impl_avx2.name = "avx2";
Expand All @@ -327,9 +322,9 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
impl_avx2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_avx2;
return impl_avx2;
}
#endif /* defined(SHUFFLE_USE_AVX2) */
#endif /* defined(SHUFFLE_AVX2_ENABLED) */

#if defined(SHUFFLE_USE_SSE2)
#if defined(SHUFFLE_SSE2_ENABLED)
if (cpu_features & BLOSC_HAVE_SSE2) {
shuffle_implementation_t impl_sse2;
impl_sse2.name = "sse2";
Expand All @@ -339,7 +334,7 @@ static shuffle_implementation_t get_shuffle_implementation(void) {
impl_sse2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_sse2;
return impl_sse2;
}
#endif /* defined(SHUFFLE_USE_SSE2) */
#endif /* defined(SHUFFLE_SSE2_ENABLED) */

/* Processor doesn't support any of the hardware-accelerated implementations,
so use the generic implementation. */
Expand Down