Skip to content

Commit

Permalink
merged with master and updated verion #
Browse files Browse the repository at this point in the history
  • Loading branch information
gblelloch committed Nov 17, 2024
2 parents e084b9e + 36459f4 commit 59291a5
Show file tree
Hide file tree
Showing 26 changed files with 922 additions and 660 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# -------------------------------------------------------------------

cmake_minimum_required(VERSION 3.14)
project(PARLAY VERSION 2.2.4
project(PARLAY VERSION 2.3.2
DESCRIPTION "A collection of parallel algorithms and other support for parallelism in C++"
LANGUAGES CXX)

Expand Down
58 changes: 58 additions & 0 deletions benchmark/bench_sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,61 @@ static void bench_short_subscript(benchmark::State& state) {
}
}

static void bench_grow_int64(benchmark::State& state) {
parlay::sequence<int64_t> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<int64_t>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

// No annotation needed since this one should be detectable
struct Relocatable {
std::unique_ptr<int> x;
Relocatable() = default;
Relocatable(int x_) : x(std::make_unique<int>(x_)) { }
};

#if defined(PARLAY_MUST_SPECIALIZE_IS_TRIVIALLY_RELOCATABLE)
namespace parlay {
template<>
PARLAY_ASSUME_TRIVIALLY_RELOCATABLE(Relocatable);
}
#endif

static_assert(parlay::is_trivially_relocatable_v<Relocatable>);

struct NotRelocatable {
std::unique_ptr<int> x;
NotRelocatable() = default;
NotRelocatable(int x_) : x(std::make_unique<int>(x_)) { }
NotRelocatable(NotRelocatable&& other) noexcept : x(std::move(other.x)) { }
~NotRelocatable() { }
};
static_assert(!parlay::is_trivially_relocatable_v<NotRelocatable>);

static void bench_grow_relocatable(benchmark::State& state) {
parlay::sequence<Relocatable> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<Relocatable>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

static void bench_grow_nonrelocatable(benchmark::State& state) {
parlay::sequence<NotRelocatable> s;
for (auto _ : state) {
state.PauseTiming();
s = parlay::sequence<NotRelocatable>(10000000);
state.ResumeTiming();
s.reserve(s.capacity() + 1); // Trigger grow
}
}

// ------------------------- Registration -------------------------------

#define BENCH(NAME) BENCHMARK(bench_ ## NAME) \
Expand All @@ -35,3 +90,6 @@ static void bench_short_subscript(benchmark::State& state) {

BENCH(subscript);
BENCH(short_subscript);
BENCH(grow_int64);
BENCH(grow_relocatable);
BENCH(grow_nonrelocatable);
7 changes: 5 additions & 2 deletions include/parlay/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,11 @@ struct allocator {
template <class U> /* implicit */ constexpr allocator(const allocator<U>&) noexcept { }
};

template<typename T>
struct is_trivially_relocatable<allocator<T>> : std::true_type {};
// Allocator should be trivially copyable since it is stateless and has no user-provided copy
// constructor. This should guarantee that it is also trivially relocatable.
static_assert(std::is_trivially_copyable_v<allocator<int>>);
static_assert(is_trivially_relocatable_v<allocator<int>>);


template <class T, class U>
bool operator==(const allocator<T>&, const allocator<U>&) { return true; }
Expand Down
4 changes: 2 additions & 2 deletions include/parlay/internal/bucket_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void radix_step_(slice<InIterator, InIterator> A,

for (size_t j = n; j > 0; j--) {
auto x = --counts[keys[j-1]];
uninitialized_relocate(&B[x], &A[j-1]);
relocate_at(&A[j - 1], &B[x]);
}
}

Expand Down Expand Up @@ -128,7 +128,7 @@ void base_sort(slice<InIterator, InIterator> in,
else {
quicksort(in.begin(), in.size(), f);
if (!inplace) {
uninitialized_relocate_n(out.begin(), in.begin(), in.size());
parlay::uninitialized_relocate(in.begin(), in.end(), out.begin());
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/collect_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ auto seq_collect_reduce_sparse(Slice A, Helper const &helper) {
auto r = r_s.begin();
size_t j = 0;
for (size_t i = 0; i < table_size; i++)
if (flags[i]) uninitialized_relocate(&r[j++], &table[i]);
if (flags[i]) relocate_at(&table[i], &r[j++]);
assert(j == count);
return r_s;
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/counting_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ auto count_sort_inplace(slice<InIterator, InIterator> In, KeyS const& Keys, size
using value_type = typename slice<InIterator, InIterator>::value_type;
auto Tmp = uninitialized_sequence<value_type>(In.size());
auto a = count_sort<uninitialized_relocate_tag>(In, make_slice(Tmp), make_slice(Keys), num_buckets);
uninitialized_relocate_n(In.begin(), Tmp.begin(), In.size());
parlay::uninitialized_relocate(Tmp.begin(), Tmp.end(), In.begin());
return a.first;
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/delayed/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ struct block_delayed_filter_t :
}
}
auto res = sequence<It>::uninitialized(n);
uninitialized_relocate_n(res.begin(), temp.begin(), n);
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
return res;
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/delayed/filter_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ struct block_delayed_filter_op_t :
}
}
auto res = sequence<result_type>::uninitialized(n);
uninitialized_relocate_n(res.begin(), temp.begin(), n);
parlay::uninitialized_relocate_n(temp.begin(), n, res.begin());
return res;
}

Expand Down
14 changes: 7 additions & 7 deletions include/parlay/internal/integer_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ void seq_radix_sort_(slice<InIterator, InIterator> In,
}

if (swapped && inplace) {
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
}
else if (!swapped && !inplace) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
}

Expand All @@ -105,10 +105,10 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
size_t n = In.size();
if (odd) {
// We could just use assign_dispatch(Tmp[i], In[i]) for each i, but we
// can optimize better by calling destructive_move_slice, since this
// can optimize better by calling uninitialized_relocate, since this
// has the ability to memcpy multiple elements at once
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Tmp.begin(), In.begin(), Tmp.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Tmp.begin());
}
else {
for (size_t i = 0; i < n; i++)
Expand All @@ -117,7 +117,7 @@ void seq_radix_sort(slice<InIterator, InIterator> In,
seq_radix_sort_(Tmp, Out, g, key_bits, false);
} else {
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
else {
for (size_t i = 0; i < n; i++)
Expand Down Expand Up @@ -219,7 +219,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,
// uninitialized_relocate_n, which can memcpy multiple elements at a time
// to save on performing every copy individually.
if constexpr (std::is_same_v<assignment_tag, uninitialized_relocate_tag>) {
uninitialized_relocate_n(Out.begin(), In.begin(), Out.size());
parlay::uninitialized_relocate(In.begin(), In.end(), Out.begin());
}
else {
parallel_for(0, In.size(), [&](size_t i) {
Expand Down Expand Up @@ -248,7 +248,7 @@ sequence<size_t> integer_sort_r(slice<InIterator, InIterator> In,

if constexpr (inplace_tag::value == true) {
if (!one_bucket) {
uninitialized_relocate_n(In.begin(), Out.begin(), In.size());
parlay::uninitialized_relocate(Out.begin(), Out.end(), In.begin());
}
}

Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/merge_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void merge_sort_(slice<InIterator, InIterator> In,
insertion_sort(In.begin(), In.size(), f);
if (!inplace) {
for (size_t i = 0; i < In.size(); i++) {
uninitialized_relocate(&Out[i], &In[i]);
relocate_at(&In[i], &Out[i]);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/sample_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ void sample_sort_inplace_(slice<InIterator, InIterator> In,

// Sample block is already sorted, so we don't need to sort it again.
// We can just move it straight over into the other sorted blocks
uninitialized_relocate_n(Tmp.begin(), sample_set.begin(), sample_set_size);
parlay::uninitialized_relocate(sample_set.begin(), sample_set.end(), Tmp.begin());

// move data from blocks to buckets
auto bucket_offsets =
Expand Down
11 changes: 10 additions & 1 deletion include/parlay/internal/sequence_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,16 @@ struct alignas(uint64_t) sequence_base {
auto n = size();
auto dest_buffer = new_buffer.data();
auto current_buffer = data();
uninitialized_relocate_n_a(dest_buffer, current_buffer, n, *this);

if constexpr (is_trivial_allocator_v<T_allocator_type, T>) {
parlay::uninitialized_relocate_n(current_buffer, n, dest_buffer);
}
else {
parallel_for(0, n, [&](size_t i){
std::allocator_traits<T_allocator_type>::construct(alloc, std::addressof(dest_buffer[i]), std::move(current_buffer[i]));
std::allocator_traits<T_allocator_type>::destroy(alloc, std::addressof(current_buffer[i]));
});
}

// Destroy the old stuff
if (!is_small()) {
Expand Down
2 changes: 1 addition & 1 deletion include/parlay/internal/thread_id_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class ThreadIdPool : public std::enable_shared_from_this<ThreadIdPool> {


~ThreadIdPool() noexcept {
size_t num_destroyed = 0;
[[maybe_unused]] size_t num_destroyed = 0;
for (auto current = available_ids.load(std::memory_order_relaxed); current; num_destroyed++) {
auto old = std::exchange(current, current->next);
delete old;
Expand Down
Loading

0 comments on commit 59291a5

Please sign in to comment.