From e74079d1c6d2bc96e53f038b85b77894dbb0c46b Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Wed, 6 Dec 2023 18:53:39 +0000 Subject: [PATCH 1/9] Make splitting more generic Rather than a pattern, `split_adaptor` now takes a "Splitter", a function object which, when passed a sequence, returns the bounds of the next slice. This opens the door to potentially doing more sophisticated splitting, for example using a (CTRE?) regex or one of the standard library searchers. At the moment, we don't publicly expose the generic Splitter interface, but this could be done in future. Rather, our existing `split()` overloads have been changed to use new (internal) `pattern_splitter` and `delim_splitter` implementations. --- include/flux/core/inline_sequence_base.hpp | 6 +- include/flux/op/split.hpp | 197 ++++++++++++--------- 2 files changed, 114 insertions(+), 89 deletions(-) diff --git a/include/flux/core/inline_sequence_base.hpp b/include/flux/core/inline_sequence_base.hpp index be1bb6c5..2129cc07 100644 --- a/include/flux/core/inline_sequence_base.hpp +++ b/include/flux/core/inline_sequence_base.hpp @@ -322,10 +322,10 @@ struct inline_sequence_base { [[nodiscard]] constexpr auto split(Pattern&& pattern) &&; - template - requires decays_to> + template + requires std::equality_comparable_with, Delim const&> [[nodiscard]] - constexpr auto split(ValueType&& delim) &&; + constexpr auto split(Delim&& delim) &&; template [[nodiscard]] diff --git a/include/flux/op/split.hpp b/include/flux/op/split.hpp index 100e3667..e371c5db 100644 --- a/include/flux/op/split.hpp +++ b/include/flux/op/split.hpp @@ -17,117 +17,142 @@ namespace flux { namespace detail { -template -struct split_adaptor : inline_sequence_base> { -private: - Base base_; - Pattern pattern_; +template +concept splitter_for = requires(Splitter& splitter, Seq& seq, cursor_t const& cur) { + { splitter(flux::slice(seq, cur, flux::last)) } -> std::same_as>; +}; - friend struct sequence_traits; +template Splitter> +struct split_adaptor : inline_sequence_base> { +private: + FLUX_NO_UNIQUE_ADDRESS Base base_; + FLUX_NO_UNIQUE_ADDRESS Splitter splitter_; public: - constexpr split_adaptor(decays_to auto&& base, decays_to auto&& pattern) + constexpr split_adaptor(decays_to auto&& base, decays_to auto&& splitter) : base_(FLUX_FWD(base)), - pattern_(FLUX_FWD(pattern)) + splitter_(FLUX_FWD(splitter)) {} -}; - -struct split_fn { - template - requires multipass_sequence && - multipass_sequence && - std::equality_comparable_with, element_t> - [[nodiscard]] - constexpr auto operator()(Seq&& seq, Pattern&& pattern) const - { - return split_adaptor, std::decay_t>( - FLUX_FWD(seq), FLUX_FWD(pattern)); - } - template - requires multipass_sequence - [[nodiscard]] - constexpr auto operator()(Seq&& seq, value_t delim) const - { - return (*this)(FLUX_FWD(seq), flux::single(std::move(delim))); - } -}; - -template -inline constexpr bool is_single_seq = false; - -template -inline constexpr bool is_single_seq> = true; - -} // namespace detail + struct flux_sequence_traits { + private: + struct cursor_type { + cursor_t cur; + bounds_t next; + bool trailing_empty = false; + + friend bool operator==(cursor_type const&, cursor_type const&) = default; + }; + + public: + static constexpr bool is_infinite = infinite_sequence; + + static constexpr auto first(auto& self) -> cursor_type + requires sequence && + splitter_for + { + auto fst = flux::first(self.base_); + auto bounds = self.splitter_(flux::slice(self.base_, fst, flux::last)); + return cursor_type{.cur = std::move(fst), + .next = std::move(bounds)}; + } -template -struct sequence_traits> -{ -private: - struct cursor_type { - cursor_t cur; - bounds_t next; - bool trailing_empty = false; + static constexpr auto is_last(auto& self, cursor_type const& cur) + -> bool + { + return flux::is_last(self.base_, cur.cur) && !cur.trailing_empty; + } - friend bool operator==(cursor_type const&, cursor_type const&) = default; - }; + static constexpr auto read_at(auto& self, cursor_type const& cur) + { + return flux::slice(self.base_, cur.cur, cur.next.from); + } - static constexpr auto find_next(auto& self, auto const& from) - { - if constexpr (detail::is_single_seq) { - // auto cur = self.base_[{cur, last}].find(self.pattern_.value()); - auto cur = flux::find(flux::slice(self.base_, from, flux::last), - self.pattern_.value()); - if (flux::is_last(self.base_, cur)) { - return bounds{cur, cur}; + static constexpr auto inc(auto& self, cursor_type& cur) -> void + { + cur.cur = cur.next.from; + if (!flux::is_last(self.base_, cur.cur)) { + cur.cur = cur.next.to; + if (flux::is_last(self.base_, cur.cur)) { + cur.trailing_empty = true; + cur.next = {cur.cur, cur.cur}; + } else { + cur.next = self.splitter_(flux::slice(self.base_, cur.cur, flux::last)); + } } else { - return bounds{cur, flux::next(self.base_, cur)}; + cur.trailing_empty = false; } - } else { - return flux::search(flux::slice(self.base_, from, flux::last), - self.pattern_); } - } + }; +}; -public: +template +struct pattern_splitter { +private: + FLUX_NO_UNIQUE_ADDRESS Pattern pattern_; - static constexpr bool is_infinite = infinite_sequence; +public: + constexpr explicit pattern_splitter(decays_to auto&& pattern) + : pattern_(FLUX_FWD(pattern)) + {} - static constexpr auto first(auto& self) -> cursor_type + template + constexpr auto operator()(Seq&& seq) const -> bounds_t { - auto bounds = flux::search(self.base_, self.pattern_); - return cursor_type(flux::first(self.base_), std::move(bounds)); + return flux::search(seq, pattern_); } +}; - static constexpr auto is_last(auto& self, cursor_type const& cur) -> bool +template +struct delim_splitter { +private: + FLUX_NO_UNIQUE_ADDRESS Delim delim_; + +public: + constexpr explicit delim_splitter(decays_to auto&& delim) + : delim_(FLUX_FWD(delim)) + {} + + template + requires std::equality_comparable_with, Delim const&> + constexpr auto operator()(Seq&& seq) const -> bounds_t { - return flux::is_last(self.base_, cur.cur) && !cur.trailing_empty; + auto nxt = flux::find(seq, delim_); + if (!flux::is_last(seq, nxt)) { + return bounds{nxt, flux::next(seq, nxt)}; + } else { + return bounds{nxt, nxt}; + } } +}; - static constexpr auto read_at(auto& self, cursor_type const& cur) - requires sequence +struct split_fn { + template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Pattern&& pattern) const { - return flux::slice(self.base_, cur.cur, cur.next.from); + using splitter_t = pattern_splitter>; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(FLUX_FWD(pattern))); } - static constexpr auto inc(auto& self, cursor_type& cur) + template + requires multipass_sequence && + std::equality_comparable_with, Delim const&> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Delim&& delim) const { - cur.cur = cur.next.from; - if (!flux::is_last(self.base_, cur.cur)) { - cur.cur = cur.next.to; - if (flux::is_last(self.base_, cur.cur)) { - cur.trailing_empty = true; - cur.next = {cur.cur, cur.cur}; - } else { - cur.next = find_next(self, cur.cur); - } - } else { - cur.trailing_empty = false; - } + using splitter_t = delim_splitter>; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(FLUX_FWD(delim))); } }; +} // namespace detail + FLUX_EXPORT inline constexpr auto split = detail::split_fn{}; template @@ -139,9 +164,9 @@ constexpr auto inline_sequence_base::split(Pattern&& pattern) && } template -template - requires decays_to> -constexpr auto inline_sequence_base::split(ValueType&& delim) && +template + requires std::equality_comparable_with, Delim const&> +constexpr auto inline_sequence_base::split(Delim&& delim) && { return flux::split(std::move(derived()), FLUX_FWD(delim)); } From 47a9b3d8db98b2631ec5b5740ebb43f18d8b7187 Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Thu, 7 Dec 2023 17:25:46 +0000 Subject: [PATCH 2/9] Improve concept checks with pattern_splitter --- include/flux/op/split.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/flux/op/split.hpp b/include/flux/op/split.hpp index e371c5db..f7289350 100644 --- a/include/flux/op/split.hpp +++ b/include/flux/op/split.hpp @@ -97,6 +97,15 @@ struct pattern_splitter { {} template + requires std::equality_comparable_with, element_t> + constexpr auto operator()(Seq&& seq) -> bounds_t + { + return flux::search(seq, pattern_); + } + + template + requires multipass_sequence && + std::equality_comparable_with, element_t> constexpr auto operator()(Seq&& seq) const -> bounds_t { return flux::search(seq, pattern_); From 62fa5240f23cb601072b625c6e14953a69ac485f Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 17:05:20 +0000 Subject: [PATCH 3/9] Fix constraints on split() member functions --- include/flux/core/inline_sequence_base.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/flux/core/inline_sequence_base.hpp b/include/flux/core/inline_sequence_base.hpp index 2129cc07..a90c1d48 100644 --- a/include/flux/core/inline_sequence_base.hpp +++ b/include/flux/core/inline_sequence_base.hpp @@ -317,13 +317,16 @@ struct inline_sequence_base { [[nodiscard]] constexpr auto slide(std::integral auto win_sz) && requires multipass_sequence; - template - requires std::equality_comparable_with, element_t> + template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> [[nodiscard]] constexpr auto split(Pattern&& pattern) &&; template - requires std::equality_comparable_with, Delim const&> + requires multipass_sequence && + std::equality_comparable_with, Delim const&> [[nodiscard]] constexpr auto split(Delim&& delim) &&; From 898a40986e2773bcf652eab1a23bdcdfe2507346 Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 17:17:13 +0000 Subject: [PATCH 4/9] Add split() overload taking a predicate We split every time the predicate returns true, i.e. split("two spaces -> <-", ::isspace) would yield ["two", "spaces", "->", "", "<-"], with an empty sequence between the arrows. --- include/flux/core/inline_sequence_base.hpp | 6 ++ include/flux/op/split.hpp | 52 +++++++++++++- test/test_split.cpp | 80 ++++++++++++++++++++-- 3 files changed, 131 insertions(+), 7 deletions(-) diff --git a/include/flux/core/inline_sequence_base.hpp b/include/flux/core/inline_sequence_base.hpp index a90c1d48..b767126c 100644 --- a/include/flux/core/inline_sequence_base.hpp +++ b/include/flux/core/inline_sequence_base.hpp @@ -330,6 +330,12 @@ struct inline_sequence_base { [[nodiscard]] constexpr auto split(Delim&& delim) &&; + template + requires multipass_sequence && + std::predicate> + [[nodiscard]] + constexpr auto split(Pred pred) &&; + template [[nodiscard]] constexpr auto split_string(Pattern&& pattern) &&; diff --git a/include/flux/op/split.hpp b/include/flux/op/split.hpp index f7289350..3c6cb8a2 100644 --- a/include/flux/op/split.hpp +++ b/include/flux/op/split.hpp @@ -135,6 +135,29 @@ struct delim_splitter { } }; +template +struct predicate_splitter { +private: + FLUX_NO_UNIQUE_ADDRESS Pred pred_; + +public: + constexpr explicit predicate_splitter(decays_to auto&& pred) + : pred_(FLUX_FWD(pred)) + {} + + template + requires std::predicate> + constexpr auto operator()(Seq&& seq) const -> bounds_t + { + auto nxt = flux::find_if(seq, pred_); + if (!flux::is_last(seq, nxt)) { + return bounds{nxt, flux::next(seq, nxt)}; + } else { + return bounds{nxt, nxt}; + } + } +}; + struct split_fn { template requires multipass_sequence && @@ -158,6 +181,17 @@ struct split_fn { return split_adaptor, splitter_t>( FLUX_FWD(seq), splitter_t(FLUX_FWD(delim))); } + + template + requires multipass_sequence && + std::predicate> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Pred pred) const + { + using splitter_t = predicate_splitter; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(std::move(pred))); + } }; } // namespace detail @@ -165,8 +199,10 @@ struct split_fn { FLUX_EXPORT inline constexpr auto split = detail::split_fn{}; template -template - requires std::equality_comparable_with, element_t> +template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> constexpr auto inline_sequence_base::split(Pattern&& pattern) && { return flux::split(std::move(derived()), FLUX_FWD(pattern)); @@ -174,12 +210,22 @@ constexpr auto inline_sequence_base::split(Pattern&& pattern) && template template - requires std::equality_comparable_with, Delim const&> + requires multipass_sequence && + std::equality_comparable_with, Delim const&> constexpr auto inline_sequence_base::split(Delim&& delim) && { return flux::split(std::move(derived()), FLUX_FWD(delim)); } +template +template + requires multipass_sequence && + std::predicate> +constexpr auto inline_sequence_base::split(Pred pred) && +{ + return flux::split(std::move(derived()), std::move(pred)); +} + } // namespace flux diff --git a/test/test_split.cpp b/test/test_split.cpp index c84b8cfa..cd2a166a 100644 --- a/test/test_split.cpp +++ b/test/test_split.cpp @@ -22,7 +22,7 @@ constexpr auto to_string_view = [](Seq&& seq) // danger Will Robi return std::basic_string_view>(flux::data(seq), flux::usize(seq)); }; -constexpr bool test_split() +constexpr bool test_split_with_delim() { using namespace std::string_view_literals; @@ -63,6 +63,13 @@ constexpr bool test_split() std::array{"a"sv, "b"sv})); } + return true; +} + +constexpr bool test_split_with_pattern() +{ + using namespace std::string_view_literals; + // Split with pattern { int nums[] = {0, 1, 2, 3, 99}; @@ -90,12 +97,77 @@ constexpr bool test_split() return true; } -static_assert(test_split()); +constexpr bool test_split_with_predicate() +{ + using namespace std::string_view_literals; + + { + std::array arr{1, 2, 0, 3, 4, 0, 5}; + + auto split = flux::ref(arr).split(flux::pred::eq(0)); + + using S = decltype(split); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(not flux::bidirectional_sequence); + static_assert(not flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(not flux::bidirectional_sequence); + static_assert(not flux::sized_sequence); + + using E = flux::element_t; + static_assert(flux::contiguous_sequence); + static_assert(flux::sized_sequence); + + using EC = flux::element_t; + static_assert(flux::contiguous_sequence); + static_assert(flux::sized_sequence); + + auto cur = split.first(); + STATIC_CHECK(check_equal(split[cur], {1, 2})); + split.inc(cur); + STATIC_CHECK(check_equal(split[cur], {3, 4})); + split.inc(cur); + STATIC_CHECK(check_equal(split[cur], {5})); + split.inc(cur); + STATIC_CHECK(split.is_last(cur)); + } + + { + auto const seq = flux::split("two spaces -> <-"sv, flux::pred::eq(' ')) + .map(to_string_view); + + STATIC_CHECK(check_equal(seq, + std::array{"two"sv, "spaces"sv, "->"sv, ""sv, "<-"sv})); + } + + return true; +} + +static_assert(test_split_with_delim()); +static_assert(test_split_with_pattern()); +static_assert(test_split_with_predicate()); + +} + +TEST_CASE("split with delimiter") +{ + bool result = test_split_with_delim(); + REQUIRE(result); +} + +TEST_CASE("split with pattern") +{ + bool result = test_split_with_pattern(); + REQUIRE(result); } -TEST_CASE("split") +TEST_CASE("split with predicate") { - bool result = test_split(); + bool result = test_split_with_predicate(); REQUIRE(result); } From 3d554f0f197156e471c5e8542dd84feb0748da32 Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 19:50:33 +0000 Subject: [PATCH 5/9] Re-enable testing of docs examples Not sure how long this has been broken for, ooops... --- example/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 03ae7d8e..0e5b48a0 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,7 +2,9 @@ function(ADD_EXAMPLE NAME SOURCE) add_executable(${NAME} ${SOURCE}) target_link_libraries(${NAME} flux) - if(NOT ${${NAME}_SKIP_TEST}) + if(${${NAME}_SKIP_TEST}) + #pass + else() add_test(NAME ${NAME} COMMAND ${NAME} ${${NAME}_ARGS}) endif() endfunction() @@ -11,8 +13,8 @@ add_example(example-config-parser config_parser.cpp) add_example(example-calendar calendar.cpp) add_example(example-merge-intervals merge_intervals.cpp) add_example(example-histogram histogram.cpp) +set(example-word-count_SKIP_TEST ON) add_example(example-word-count word_count.cpp) -set(example-word-count_SKIP_TEST TRUE) add_example(example-prime-numbers prime_numbers.cpp) add_example(example-shortest-path shortest_path.cpp) add_example(example-moving-average moving_average.cpp) From a421818b13be4718eb9d45626a48b72a2001e6e3 Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 20:14:23 +0000 Subject: [PATCH 6/9] Add a cool recursive overload of equal() If you pass equal() two sequences-of-sequences which are not directly equality comparable, but whose inner sequence types *are* comparable, then it will recursively call itself to compare the inner sequences. This is actually pretty neat. --- include/flux/op/equal.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/flux/op/equal.hpp b/include/flux/op/equal.hpp index 93450138..b307c55b 100644 --- a/include/flux/op/equal.hpp +++ b/include/flux/op/equal.hpp @@ -74,6 +74,22 @@ struct equal_fn { return impl(seq1, seq2, cmp); } } + + template + requires (sequence> && + sequence> && + !std::equality_comparable_with, element_t> && + std::is_invocable_v) + constexpr auto operator()(Seq1&& seq1, Seq2&& seq2) const -> bool + { + if constexpr (sized_sequence && sized_sequence) { + if (flux::size(seq1) != flux::size(seq2)) { + return false; + } + } + + return (*this)(seq1, seq2, *this); + } }; } // namespace detail From 5f18ec38d07f7e3779f1c600e24c856b71f3d9ab Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 23:10:25 +0000 Subject: [PATCH 7/9] Make flux::is_empty callable with rvalues ...and likewise, size() and usize(). These can be useful with sequences that produce rvalue subsequences, like split(). --- include/flux/core/sequence_access.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/flux/core/sequence_access.hpp b/include/flux/core/sequence_access.hpp index d9b5e622..27edc7b5 100644 --- a/include/flux/core/sequence_access.hpp +++ b/include/flux/core/sequence_access.hpp @@ -115,7 +115,7 @@ struct last_fn { struct size_fn { template [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> distance_t + constexpr auto operator()(Seq&& seq) const -> distance_t { if constexpr (requires { traits_t::size(seq); }) { return traits_t::size(seq); @@ -129,7 +129,7 @@ struct size_fn { struct usize_fn { template [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> std::size_t + constexpr auto operator()(Seq&& seq) const -> std::size_t { return checked_cast(size_fn{}(seq)); } @@ -277,7 +277,7 @@ struct is_empty_fn { template requires (multipass_sequence || sized_sequence) [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> bool + constexpr auto operator()(Seq&& seq) const -> bool { if constexpr (sized_sequence) { return flux::size(seq) == 0; From 5a48261fbcc62b1c31146136fb2347906cafdd0e Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Mon, 11 Dec 2023 23:13:45 +0000 Subject: [PATCH 8/9] Add split() documentation And a long old example, too --- docs/reference/adaptors.rst | 50 ++++++++++++++++++----- example/CMakeLists.txt | 1 + example/docs/split.cpp | 80 +++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 9 deletions(-) create mode 100644 example/docs/split.cpp diff --git a/docs/reference/adaptors.rst b/docs/reference/adaptors.rst index 3f5d1f8e..152e7719 100644 --- a/docs/reference/adaptors.rst +++ b/docs/reference/adaptors.rst @@ -631,23 +631,55 @@ Adaptors .. function:: auto slide(multipass_sequence auto seq, std::integral auto win_sz) -> multipass_sequence auto; -``stride`` -^^^^^^^^^^ - -.. function:: - auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto; - ``split`` ^^^^^^^^^ +.. function:: + template \ + requires std::equality_comparable_with, Delim const&> \ + auto split(Seq seq, Delim delim) -> multipass_sequence auto; + .. function:: template \ requires std::equality_comparable_with, element_t> \ - auto split(Seq seq, Pattern pattern) -> sequence auto; + auto split(Seq seq, Pattern pattern) -> multipass_sequence auto; .. function:: - template \ - auto split(Seq seq, value_t delim) -> sequence auto; + template \ + requires std::predicate> \ + auto split(Seq seq, Pred pred) -> multipass_sequence auto; + + Splits a :concept:`multipass_sequence` into a sequence-of-subsequences using the given argument. + + The first overload takes a delimiter, which must be equality comparable with the source sequence's value type. The source sequence will be split on each occurrence of the delimiter, with the delimiter itself removed. Consecutive delimiters will result in empty subsequences in the output. If the source sequence begins with a delimiter then the first subsequence will be empty, and likewise if it ends with a delimiter then the final subsequence will be empty. + + The second overload takes another sequence, the :var:`pattern`, whose elements must be equality comparable with the elements of the source sequence. The source is split whenever the pattern occurs as a subsequence. Consecutive (non-overlapping) occurrences of the pattern will result in empty sequences in the output. If :expr:`ends_with(seq, pattern)` is :expr:`true`, the final subsequence will be empty. + + The third overload takes a unary predicate which will be called with successive elements of the source sequence and returns :expr:`true` when a split should occur. The "``true``" element will be removed from the output. If the predicate returns ``true`` for two consecutive of the source, then the output will contain an empty subsequence. If the predicate returns ``true``` for the final element of the source, then the final subsequence will be empty. + + :param seq: A multipass sequence to split. + :param delim: For the first overload, a delimiter to split on. Must be equality comparable with the element type of :var:`seq` + :param pattern: For the second overload, a multipass sequence to split on. Its element type must be equality comparable with the element type of :var:`seq`. + :param pred: For the third overload, a unary predicate accepting elements of :var:`seq`, returning ``true`` when a split should occur. + + :returns: A multipass sequence whose elements are subsequences of :var:`seq`. + + :example: + + .. literalinclude:: ../../example/docs/split.cpp + :language: cpp + :dedent: + :lines: 18-79 + + :see also: + * `std::views::split() `_ + * :func:`flux::chunk_by` + +``stride`` +^^^^^^^^^^ + +.. function:: + auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto; ``take`` ^^^^^^^^ diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 0e5b48a0..44f5668b 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -43,6 +43,7 @@ add_example(example-docs-set-intersection docs/set_intersection.cpp) add_example(example-docs-set-symmetric-difference docs/set_symmetric_difference.cpp) add_example(example-docs-set-union docs/set_union.cpp) add_example(example-docs-scan-first docs/scan_first.cpp) +add_example(example-docs-split docs/split.cpp) add_example(example-docs-starts-with docs/starts_with.cpp) add_example(example-docs-unfold docs/unfold.cpp) diff --git a/example/docs/split.cpp b/example/docs/split.cpp new file mode 100644 index 00000000..b8c71cf0 --- /dev/null +++ b/example/docs/split.cpp @@ -0,0 +1,80 @@ + +// Copyright (c) 2023 Tristan Brindle (tcbrindle at gmail dot com) +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include + +#include +#include +#include +#include +#include + +using namespace std::string_view_literals; + +int main() +{ + using flux::equal; + + // We can split a sequence using a single delimiter + auto seq1 = flux::split("here are some words"sv, ' '); + assert(equal(seq1, std::array{"here"sv, "are"sv, "some"sv, "words"sv})); + + + // Consecutive delimiters will result in empty subsequences in the output + auto seq2 = flux::split("some,,,commas"sv, ','); + assert(equal(seq2, std::array{"some"sv, ""sv, ""sv, "commas"sv})); + + + // If the sequence ends with a delimiter, the final subsequence will be empty + auto seq3 = flux::split("Two. Sentences."sv, '.'); + assert(equal(seq3, std::array{"Two"sv, " Sentences"sv, ""sv})); + + + // We can also split a sequence with a pattern + auto seq4 = flux::split(std::vector{1, 2, 3, 4, 5}, std::array{2, 3}); + assert(equal(seq4, std::vector{std::vector{1}, std::vector{4, 5}})); + + + // Repeated, non-overlapping patterns result in empty subsequences + auto seq5 = flux::split("Hello!!!!World"sv, "!!"sv); + assert(equal(seq5, std::array{"Hello"sv, ""sv, "World"sv})); + + + // Overlapping patterns are only matched once + auto seq6 = flux::split("Hello!!!World"sv, "!!"sv); + assert(equal(seq6, std::array{"Hello"sv, "!World"sv})); + + + // If the sequence begins with the pattern, the first subsequence will + // be empty... + auto seq7 = flux::split("!!Hello"sv, "!!"sv); + assert(equal(seq7, std::array{""sv, "Hello"sv})); + + + // ... and likewise if it ends with the pattern + auto seq8 = flux::split("Hello!!"sv, "!!"sv); + assert(equal(seq8, std::array{"Hello"sv, ""sv})); + + + // Lastly, we can split using a predicate function + auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; + + auto seq9 = flux::split("These1are2some3words"sv, is_digit); + assert(equal(seq9, std::array{"These"sv, "are"sv, "some"sv, "words"sv})); + + + // As usual, consecutive "true" elements in the input will produce + // empty subsequences in the output + auto seq10 = flux::split("A123B"sv, is_digit); + assert(equal(seq10, std::array{"A"sv, ""sv, ""sv, "B"sv})); + + + // It can be useful combine splitting with a "not empty" filter + auto is_space = [](char c) { return std::isspace(static_cast(c)); }; + + auto seq11 = flux::split("Alpha Bravo\t\rCharlie \n"sv, is_space) + .filter(std::not_fn(flux::is_empty)); + assert(equal(seq11, std::array{"Alpha"sv, "Bravo"sv, "Charlie"sv})); +} \ No newline at end of file From 3daf89057f9c164f73b89215342599dbd2fcece7 Mon Sep 17 00:00:00 2001 From: Tristan Brindle Date: Tue, 12 Dec 2023 00:48:12 +0000 Subject: [PATCH 9/9] Make split_adaptor a bounded_sequence When its parent sequence is bounded, anyway --- docs/reference/adaptors.rst | 2 ++ include/flux/op/split.hpp | 15 ++++++++++++--- test/test_split.cpp | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/docs/reference/adaptors.rst b/docs/reference/adaptors.rst index 152e7719..64f4a179 100644 --- a/docs/reference/adaptors.rst +++ b/docs/reference/adaptors.rst @@ -657,6 +657,8 @@ Adaptors The third overload takes a unary predicate which will be called with successive elements of the source sequence and returns :expr:`true` when a split should occur. The "``true``" element will be removed from the output. If the predicate returns ``true`` for two consecutive of the source, then the output will contain an empty subsequence. If the predicate returns ``true``` for the final element of the source, then the final subsequence will be empty. + The returned sequence is always a :concept:`multipass_sequence`. It is additionally a :concept:`bounded_sequence` when :var:`Seq` is bounded. + :param seq: A multipass sequence to split. :param delim: For the first overload, a delimiter to split on. Must be equality comparable with the element type of :var:`seq` :param pattern: For the second overload, a multipass sequence to split on. Its element type must be equality comparable with the element type of :var:`seq`. diff --git a/include/flux/op/split.hpp b/include/flux/op/split.hpp index 3c6cb8a2..1d12b513 100644 --- a/include/flux/op/split.hpp +++ b/include/flux/op/split.hpp @@ -37,11 +37,14 @@ struct split_adaptor : inline_sequence_base> { struct flux_sequence_traits { private: struct cursor_type { - cursor_t cur; - bounds_t next; + cursor_t cur{}; + bounds_t next{}; bool trailing_empty = false; - friend bool operator==(cursor_type const&, cursor_type const&) = default; + friend constexpr bool operator==(cursor_type const& lhs, cursor_type const& rhs) + { + return lhs.cur == rhs.cur && lhs.trailing_empty == rhs.trailing_empty; + } }; public: @@ -83,6 +86,12 @@ struct split_adaptor : inline_sequence_base> { cur.trailing_empty = false; } } + + static constexpr auto last(auto& self) -> cursor_type + requires bounded_sequence + { + return cursor_type{.cur = flux::last(self.base_)}; + } }; }; diff --git a/test/test_split.cpp b/test/test_split.cpp index cd2a166a..4ab0598e 100644 --- a/test/test_split.cpp +++ b/test/test_split.cpp @@ -34,9 +34,11 @@ constexpr bool test_split_with_delim() using S = decltype(split); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(flux::contiguous_sequence>); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(flux::contiguous_sequence>); STATIC_CHECK(check_equal(std::move(split).map(to_string_view), @@ -48,6 +50,16 @@ constexpr bool test_split_with_delim() auto split = flux::split(" trailing space "sv, ' ').map(to_string_view); STATIC_CHECK(check_equal(split, std::array{""sv, "trailing"sv, "space"sv, ""sv})); + + auto cur = split.first(); + split.inc(cur); + split.inc(cur); + split.inc(cur); + STATIC_CHECK(cur.trailing_empty == true); + STATIC_CHECK(cur != split.last()); + split.inc(cur); + STATIC_CHECK(cur.trailing_empty == false); + STATIC_CHECK(cur == split.last()); } // Non-bounded sequences can be split correctly @@ -57,6 +69,7 @@ constexpr bool test_split_with_delim() using S = decltype(split); static_assert(flux::multipass_sequence); + static_assert(not flux::bounded_sequence); static_assert(flux::contiguous_sequence>); STATIC_CHECK(check_equal(std::move(split).map(to_string_view), @@ -111,11 +124,13 @@ constexpr bool test_split_with_predicate() static_assert(flux::sequence); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(not flux::bidirectional_sequence); static_assert(not flux::sized_sequence); static_assert(flux::sequence); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(not flux::bidirectional_sequence); static_assert(not flux::sized_sequence); @@ -132,9 +147,12 @@ constexpr bool test_split_with_predicate() split.inc(cur); STATIC_CHECK(check_equal(split[cur], {3, 4})); split.inc(cur); + STATIC_CHECK(cur != split.last()); STATIC_CHECK(check_equal(split[cur], {5})); split.inc(cur); STATIC_CHECK(split.is_last(cur)); + + STATIC_CHECK(cur == split.last()); } {