-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
cb2ceeb
commit ee6574e
Showing
17 changed files
with
593 additions
and
399 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,7 @@ block-sorting data compression algorithms. | |
libbsc is a library based on bsc, it uses the same algorithms | ||
as bsc and enables you to compress memory blocks. | ||
|
||
Copyright (c) 2009-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2009-2025 Ilya Grebnov <[email protected]> | ||
|
||
See file AUTHORS for a full list of contributors. | ||
|
||
|
@@ -21,7 +21,7 @@ See the bsc and libbsc web site: | |
Software License: | ||
----------------- | ||
|
||
Copyright (c) 2009-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2009-2025 Ilya Grebnov <[email protected]> | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
3.3.4 | ||
3.3.5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
This file is a part of bsc and/or libbsc, a program and a library for | ||
lossless, block-sorting data compression. | ||
Copyright (c) 2009-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2009-2025 Ilya Grebnov <[email protected]> | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
@@ -869,8 +869,8 @@ void ProcessCommandline(int argc, char * argv[]) | |
|
||
int main(int argc, char * argv[]) | ||
{ | ||
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.3.4. 24 January 2024.\n"); | ||
fprintf(stdout, "Copyright (c) 2009-2024 Ilya Grebnov <[email protected]>.\n\n"); | ||
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.3.5. 6 February 2025.\n"); | ||
fprintf(stdout, "Copyright (c) 2009-2025 Ilya Grebnov <[email protected]>.\n\n"); | ||
|
||
#if defined(_OPENMP) && defined(__INTEL_COMPILER) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
This file is a part of bsc and/or libbsc, a program and a library for | ||
lossless, block-sorting data compression. | ||
Copyright (c) 2009-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2009-2025 Ilya Grebnov <[email protected]> | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
@@ -41,6 +41,10 @@ See also the bsc and libbsc web site: | |
#include "libcubwt/libcubwt.cuh" | ||
#include "libsais/libsais.h" | ||
|
||
#if defined(LIBBSC_OPENMP) && !defined(LIBSAIS_OPENMP) | ||
#error "LIBBSC_OPENMP requires LIBSAIS_OPENMP to be defined. Please define LIBSAIS_OPENMP and enable OpenMP support for libsais." | ||
#endif | ||
|
||
#if defined(LIBBSC_CUDA_SUPPORT) && defined(LIBBSC_OPENMP) | ||
|
||
omp_lock_t bwt_cuda_lock; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.6.0 | ||
1.6.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
This file is a part of libcubwt, a library for CUDA accelerated | ||
burrows wheeler transform construction and inversion. | ||
Copyright (c) 2022-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2022-2025 Ilya Grebnov <[email protected]> | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
@@ -53,13 +53,13 @@ Please see the file LICENSE for full copyright and license details. | |
|
||
#if CUDA_DEVICE_ARCH == 750 | ||
#define CUDA_SM_THREADS (1024) | ||
#elif CUDA_DEVICE_ARCH == 860 || CUDA_DEVICE_ARCH == 870 || CUDA_DEVICE_ARCH == 890 | ||
#elif CUDA_DEVICE_ARCH == 860 || CUDA_DEVICE_ARCH == 870 || CUDA_DEVICE_ARCH == 890 || CUDA_DEVICE_ARCH == 1010 || CUDA_DEVICE_ARCH == 1200 | ||
#define CUDA_SM_THREADS (1536) | ||
#else | ||
#define CUDA_SM_THREADS (2048) | ||
#endif | ||
|
||
#if CUDA_DEVICE_ARCH == 860 || CUDA_DEVICE_ARCH == 870 || CUDA_DEVICE_ARCH == 890 | ||
#if CUDA_DEVICE_ARCH == 860 || CUDA_DEVICE_ARCH == 870 || CUDA_DEVICE_ARCH == 890 || CUDA_DEVICE_ARCH == 1010 || CUDA_DEVICE_ARCH == 1200 | ||
#define CUDA_BLOCK_THREADS (768) | ||
#else | ||
#define CUDA_BLOCK_THREADS (512) | ||
|
@@ -222,6 +222,15 @@ static __device__ __forceinline__ uint32_t libcubwt_match_any_sync(const uint32_ | |
return peers_mask; | ||
} | ||
|
||
static __device__ __forceinline__ uint32_t libcubwt_get_lanemask_lt() | ||
{ | ||
uint32_t mask; | ||
|
||
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask)); | ||
|
||
return mask; | ||
} | ||
|
||
static __device__ __forceinline__ uint32_t libcubwt_xxhash32_b32(uint32_t data, uint32_t seed) | ||
{ | ||
uint32_t x = (data * 0xc2b2ae3du) + seed + (uint32_t)sizeof(data) + 0x165667b1u; | ||
|
@@ -2262,7 +2271,7 @@ int64_t libcubwt_allocate_device_storage(void ** device_storage, int64_t max_len | |
{ | ||
storage->device_L2_cache_bits = 0; while (cuda_device_L2_cache_size >>= 1) { storage->device_L2_cache_bits += 1; }; | ||
|
||
storage->cuda_block_threads = (cuda_device_capability == 860 || cuda_device_capability == 870 || cuda_device_capability == 890) ? 768u : 512u; | ||
storage->cuda_block_threads = (cuda_device_capability == 860 || cuda_device_capability == 870 || cuda_device_capability == 890 || cuda_device_capability == 1010 || cuda_device_capability == 1200) ? 768u : 512u; | ||
} | ||
} | ||
|
||
|
@@ -2271,7 +2280,6 @@ int64_t libcubwt_allocate_device_storage(void ** device_storage, int64_t max_len | |
int64_t num_descriptors = ((max_reduced_length / (storage->cuda_block_threads * 4)) + 1024) & (-1024); | ||
|
||
{ | ||
cub::DoubleBuffer<uint8_t> uint8_db; | ||
cub::DoubleBuffer<uint32_t> uint32_db; | ||
cub::DoubleBuffer<uint64_t> uint64_db; | ||
|
||
|
@@ -2669,7 +2677,7 @@ static void libcubwt_compute_LF_mapping(const uint8_t * RESTRICT device_L, const | |
uint32_t byte = thread_bytes[0]; if (primary_index == thread_index) { byte = 256; } | ||
|
||
uint32_t peers_mask = libcubwt_match_any_sync<9>((uint32_t)-1, byte); | ||
uint32_t peers_offset = __popc(peers_mask & cub::LaneMaskLt()); | ||
uint32_t peers_offset = __popc(peers_mask & libcubwt_get_lanemask_lt()); | ||
uint32_t warp_offset = (byte < 256 ? warp_histogram[byte] : 0); | ||
|
||
device_LF[thread_index] = warp_offset + peers_offset; | ||
|
@@ -2699,7 +2707,7 @@ static void libcubwt_compute_LF_mapping(const uint8_t * RESTRICT device_L, const | |
uint32_t byte = thread_bytes[0]; | ||
|
||
uint32_t peers_mask = libcubwt_match_any_sync<8>((uint32_t)-1, byte); | ||
uint32_t peers_offset = __popc(peers_mask & cub::LaneMaskLt()); | ||
uint32_t peers_offset = __popc(peers_mask & libcubwt_get_lanemask_lt()); | ||
uint32_t warp_offset = warp_histogram[byte]; | ||
|
||
device_LF[thread_index] = warp_offset + peers_offset; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
This file is a part of libcubwt, a library for CUDA accelerated | ||
burrows wheeler transform construction and inversion. | ||
Copyright (c) 2022-2024 Ilya Grebnov <[email protected]> | ||
Copyright (c) 2022-2025 Ilya Grebnov <[email protected]> | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
|
@@ -26,8 +26,8 @@ Please see the file LICENSE for full copyright and license details. | |
|
||
#define LIBCUBWT_VERSION_MAJOR 1 | ||
#define LIBCUBWT_VERSION_MINOR 6 | ||
#define LIBCUBWT_VERSION_PATCH 0 | ||
#define LIBCUBWT_VERSION_STRING "1.6.0" | ||
#define LIBCUBWT_VERSION_PATCH 1 | ||
#define LIBCUBWT_VERSION_STRING "1.6.1" | ||
|
||
#define LIBCUBWT_NO_ERROR 0 | ||
#define LIBCUBWT_BAD_PARAMETER -1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
2.7.2 | ||
2.8.7 |
Oops, something went wrong.