diff --git a/docs/reference/adaptors.rst b/docs/reference/adaptors.rst index 3f5d1f8e..64f4a179 100644 --- a/docs/reference/adaptors.rst +++ b/docs/reference/adaptors.rst @@ -631,23 +631,57 @@ Adaptors .. function:: auto slide(multipass_sequence auto seq, std::integral auto win_sz) -> multipass_sequence auto; -``stride`` -^^^^^^^^^^ - -.. function:: - auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto; - ``split`` ^^^^^^^^^ +.. function:: + template \ + requires std::equality_comparable_with, Delim const&> \ + auto split(Seq seq, Delim delim) -> multipass_sequence auto; + .. function:: template \ requires std::equality_comparable_with, element_t> \ - auto split(Seq seq, Pattern pattern) -> sequence auto; + auto split(Seq seq, Pattern pattern) -> multipass_sequence auto; .. function:: - template \ - auto split(Seq seq, value_t delim) -> sequence auto; + template \ + requires std::predicate> \ + auto split(Seq seq, Pred pred) -> multipass_sequence auto; + + Splits a :concept:`multipass_sequence` into a sequence-of-subsequences using the given argument. + + The first overload takes a delimiter, which must be equality comparable with the source sequence's value type. The source sequence will be split on each occurrence of the delimiter, with the delimiter itself removed. Consecutive delimiters will result in empty subsequences in the output. If the source sequence begins with a delimiter then the first subsequence will be empty, and likewise if it ends with a delimiter then the final subsequence will be empty. + + The second overload takes another sequence, the :var:`pattern`, whose elements must be equality comparable with the elements of the source sequence. The source is split whenever the pattern occurs as a subsequence. Consecutive (non-overlapping) occurrences of the pattern will result in empty sequences in the output. If :expr:`ends_with(seq, pattern)` is :expr:`true`, the final subsequence will be empty. + + The third overload takes a unary predicate which will be called with successive elements of the source sequence and returns :expr:`true` when a split should occur. The "``true``" element will be removed from the output. If the predicate returns ``true`` for two consecutive of the source, then the output will contain an empty subsequence. If the predicate returns ``true``` for the final element of the source, then the final subsequence will be empty. + + The returned sequence is always a :concept:`multipass_sequence`. It is additionally a :concept:`bounded_sequence` when :var:`Seq` is bounded. + + :param seq: A multipass sequence to split. + :param delim: For the first overload, a delimiter to split on. Must be equality comparable with the element type of :var:`seq` + :param pattern: For the second overload, a multipass sequence to split on. Its element type must be equality comparable with the element type of :var:`seq`. + :param pred: For the third overload, a unary predicate accepting elements of :var:`seq`, returning ``true`` when a split should occur. + + :returns: A multipass sequence whose elements are subsequences of :var:`seq`. + + :example: + + .. literalinclude:: ../../example/docs/split.cpp + :language: cpp + :dedent: + :lines: 18-79 + + :see also: + * `std::views::split() `_ + * :func:`flux::chunk_by` + +``stride`` +^^^^^^^^^^ + +.. function:: + auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto; ``take`` ^^^^^^^^ diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 03ae7d8e..44f5668b 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,7 +2,9 @@ function(ADD_EXAMPLE NAME SOURCE) add_executable(${NAME} ${SOURCE}) target_link_libraries(${NAME} flux) - if(NOT ${${NAME}_SKIP_TEST}) + if(${${NAME}_SKIP_TEST}) + #pass + else() add_test(NAME ${NAME} COMMAND ${NAME} ${${NAME}_ARGS}) endif() endfunction() @@ -11,8 +13,8 @@ add_example(example-config-parser config_parser.cpp) add_example(example-calendar calendar.cpp) add_example(example-merge-intervals merge_intervals.cpp) add_example(example-histogram histogram.cpp) +set(example-word-count_SKIP_TEST ON) add_example(example-word-count word_count.cpp) -set(example-word-count_SKIP_TEST TRUE) add_example(example-prime-numbers prime_numbers.cpp) add_example(example-shortest-path shortest_path.cpp) add_example(example-moving-average moving_average.cpp) @@ -41,6 +43,7 @@ add_example(example-docs-set-intersection docs/set_intersection.cpp) add_example(example-docs-set-symmetric-difference docs/set_symmetric_difference.cpp) add_example(example-docs-set-union docs/set_union.cpp) add_example(example-docs-scan-first docs/scan_first.cpp) +add_example(example-docs-split docs/split.cpp) add_example(example-docs-starts-with docs/starts_with.cpp) add_example(example-docs-unfold docs/unfold.cpp) diff --git a/example/docs/split.cpp b/example/docs/split.cpp new file mode 100644 index 00000000..b8c71cf0 --- /dev/null +++ b/example/docs/split.cpp @@ -0,0 +1,80 @@ + +// Copyright (c) 2023 Tristan Brindle (tcbrindle at gmail dot com) +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include + +#include +#include +#include +#include +#include + +using namespace std::string_view_literals; + +int main() +{ + using flux::equal; + + // We can split a sequence using a single delimiter + auto seq1 = flux::split("here are some words"sv, ' '); + assert(equal(seq1, std::array{"here"sv, "are"sv, "some"sv, "words"sv})); + + + // Consecutive delimiters will result in empty subsequences in the output + auto seq2 = flux::split("some,,,commas"sv, ','); + assert(equal(seq2, std::array{"some"sv, ""sv, ""sv, "commas"sv})); + + + // If the sequence ends with a delimiter, the final subsequence will be empty + auto seq3 = flux::split("Two. Sentences."sv, '.'); + assert(equal(seq3, std::array{"Two"sv, " Sentences"sv, ""sv})); + + + // We can also split a sequence with a pattern + auto seq4 = flux::split(std::vector{1, 2, 3, 4, 5}, std::array{2, 3}); + assert(equal(seq4, std::vector{std::vector{1}, std::vector{4, 5}})); + + + // Repeated, non-overlapping patterns result in empty subsequences + auto seq5 = flux::split("Hello!!!!World"sv, "!!"sv); + assert(equal(seq5, std::array{"Hello"sv, ""sv, "World"sv})); + + + // Overlapping patterns are only matched once + auto seq6 = flux::split("Hello!!!World"sv, "!!"sv); + assert(equal(seq6, std::array{"Hello"sv, "!World"sv})); + + + // If the sequence begins with the pattern, the first subsequence will + // be empty... + auto seq7 = flux::split("!!Hello"sv, "!!"sv); + assert(equal(seq7, std::array{""sv, "Hello"sv})); + + + // ... and likewise if it ends with the pattern + auto seq8 = flux::split("Hello!!"sv, "!!"sv); + assert(equal(seq8, std::array{"Hello"sv, ""sv})); + + + // Lastly, we can split using a predicate function + auto is_digit = [](char c) { return c >= '0' && c <= '9'; }; + + auto seq9 = flux::split("These1are2some3words"sv, is_digit); + assert(equal(seq9, std::array{"These"sv, "are"sv, "some"sv, "words"sv})); + + + // As usual, consecutive "true" elements in the input will produce + // empty subsequences in the output + auto seq10 = flux::split("A123B"sv, is_digit); + assert(equal(seq10, std::array{"A"sv, ""sv, ""sv, "B"sv})); + + + // It can be useful combine splitting with a "not empty" filter + auto is_space = [](char c) { return std::isspace(static_cast(c)); }; + + auto seq11 = flux::split("Alpha Bravo\t\rCharlie \n"sv, is_space) + .filter(std::not_fn(flux::is_empty)); + assert(equal(seq11, std::array{"Alpha"sv, "Bravo"sv, "Charlie"sv})); +} \ No newline at end of file diff --git a/include/flux/core/inline_sequence_base.hpp b/include/flux/core/inline_sequence_base.hpp index be1bb6c5..b767126c 100644 --- a/include/flux/core/inline_sequence_base.hpp +++ b/include/flux/core/inline_sequence_base.hpp @@ -317,15 +317,24 @@ struct inline_sequence_base { [[nodiscard]] constexpr auto slide(std::integral auto win_sz) && requires multipass_sequence; - template - requires std::equality_comparable_with, element_t> + template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> [[nodiscard]] constexpr auto split(Pattern&& pattern) &&; - template - requires decays_to> + template + requires multipass_sequence && + std::equality_comparable_with, Delim const&> + [[nodiscard]] + constexpr auto split(Delim&& delim) &&; + + template + requires multipass_sequence && + std::predicate> [[nodiscard]] - constexpr auto split(ValueType&& delim) &&; + constexpr auto split(Pred pred) &&; template [[nodiscard]] diff --git a/include/flux/core/sequence_access.hpp b/include/flux/core/sequence_access.hpp index d9b5e622..27edc7b5 100644 --- a/include/flux/core/sequence_access.hpp +++ b/include/flux/core/sequence_access.hpp @@ -115,7 +115,7 @@ struct last_fn { struct size_fn { template [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> distance_t + constexpr auto operator()(Seq&& seq) const -> distance_t { if constexpr (requires { traits_t::size(seq); }) { return traits_t::size(seq); @@ -129,7 +129,7 @@ struct size_fn { struct usize_fn { template [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> std::size_t + constexpr auto operator()(Seq&& seq) const -> std::size_t { return checked_cast(size_fn{}(seq)); } @@ -277,7 +277,7 @@ struct is_empty_fn { template requires (multipass_sequence || sized_sequence) [[nodiscard]] - constexpr auto operator()(Seq& seq) const -> bool + constexpr auto operator()(Seq&& seq) const -> bool { if constexpr (sized_sequence) { return flux::size(seq) == 0; diff --git a/include/flux/op/equal.hpp b/include/flux/op/equal.hpp index 93450138..b307c55b 100644 --- a/include/flux/op/equal.hpp +++ b/include/flux/op/equal.hpp @@ -74,6 +74,22 @@ struct equal_fn { return impl(seq1, seq2, cmp); } } + + template + requires (sequence> && + sequence> && + !std::equality_comparable_with, element_t> && + std::is_invocable_v) + constexpr auto operator()(Seq1&& seq1, Seq2&& seq2) const -> bool + { + if constexpr (sized_sequence && sized_sequence) { + if (flux::size(seq1) != flux::size(seq2)) { + return false; + } + } + + return (*this)(seq1, seq2, *this); + } }; } // namespace detail diff --git a/include/flux/op/split.hpp b/include/flux/op/split.hpp index 100e3667..1d12b513 100644 --- a/include/flux/op/split.hpp +++ b/include/flux/op/split.hpp @@ -17,135 +17,224 @@ namespace flux { namespace detail { -template -struct split_adaptor : inline_sequence_base> { -private: - Base base_; - Pattern pattern_; +template +concept splitter_for = requires(Splitter& splitter, Seq& seq, cursor_t const& cur) { + { splitter(flux::slice(seq, cur, flux::last)) } -> std::same_as>; +}; - friend struct sequence_traits; +template Splitter> +struct split_adaptor : inline_sequence_base> { +private: + FLUX_NO_UNIQUE_ADDRESS Base base_; + FLUX_NO_UNIQUE_ADDRESS Splitter splitter_; public: - constexpr split_adaptor(decays_to auto&& base, decays_to auto&& pattern) + constexpr split_adaptor(decays_to auto&& base, decays_to auto&& splitter) : base_(FLUX_FWD(base)), - pattern_(FLUX_FWD(pattern)) + splitter_(FLUX_FWD(splitter)) {} + + struct flux_sequence_traits { + private: + struct cursor_type { + cursor_t cur{}; + bounds_t next{}; + bool trailing_empty = false; + + friend constexpr bool operator==(cursor_type const& lhs, cursor_type const& rhs) + { + return lhs.cur == rhs.cur && lhs.trailing_empty == rhs.trailing_empty; + } + }; + + public: + static constexpr bool is_infinite = infinite_sequence; + + static constexpr auto first(auto& self) -> cursor_type + requires sequence && + splitter_for + { + auto fst = flux::first(self.base_); + auto bounds = self.splitter_(flux::slice(self.base_, fst, flux::last)); + return cursor_type{.cur = std::move(fst), + .next = std::move(bounds)}; + } + + static constexpr auto is_last(auto& self, cursor_type const& cur) + -> bool + { + return flux::is_last(self.base_, cur.cur) && !cur.trailing_empty; + } + + static constexpr auto read_at(auto& self, cursor_type const& cur) + { + return flux::slice(self.base_, cur.cur, cur.next.from); + } + + static constexpr auto inc(auto& self, cursor_type& cur) -> void + { + cur.cur = cur.next.from; + if (!flux::is_last(self.base_, cur.cur)) { + cur.cur = cur.next.to; + if (flux::is_last(self.base_, cur.cur)) { + cur.trailing_empty = true; + cur.next = {cur.cur, cur.cur}; + } else { + cur.next = self.splitter_(flux::slice(self.base_, cur.cur, flux::last)); + } + } else { + cur.trailing_empty = false; + } + } + + static constexpr auto last(auto& self) -> cursor_type + requires bounded_sequence + { + return cursor_type{.cur = flux::last(self.base_)}; + } + }; }; -struct split_fn { - template - requires multipass_sequence && - multipass_sequence && - std::equality_comparable_with, element_t> - [[nodiscard]] - constexpr auto operator()(Seq&& seq, Pattern&& pattern) const +template +struct pattern_splitter { +private: + FLUX_NO_UNIQUE_ADDRESS Pattern pattern_; + +public: + constexpr explicit pattern_splitter(decays_to auto&& pattern) + : pattern_(FLUX_FWD(pattern)) + {} + + template + requires std::equality_comparable_with, element_t> + constexpr auto operator()(Seq&& seq) -> bounds_t { - return split_adaptor, std::decay_t>( - FLUX_FWD(seq), FLUX_FWD(pattern)); + return flux::search(seq, pattern_); } - template - requires multipass_sequence - [[nodiscard]] - constexpr auto operator()(Seq&& seq, value_t delim) const + template + requires multipass_sequence && + std::equality_comparable_with, element_t> + constexpr auto operator()(Seq&& seq) const -> bounds_t { - return (*this)(FLUX_FWD(seq), flux::single(std::move(delim))); + return flux::search(seq, pattern_); } }; -template -inline constexpr bool is_single_seq = false; - -template -inline constexpr bool is_single_seq> = true; - -} // namespace detail - -template -struct sequence_traits> -{ +template +struct delim_splitter { private: - struct cursor_type { - cursor_t cur; - bounds_t next; - bool trailing_empty = false; + FLUX_NO_UNIQUE_ADDRESS Delim delim_; - friend bool operator==(cursor_type const&, cursor_type const&) = default; - }; +public: + constexpr explicit delim_splitter(decays_to auto&& delim) + : delim_(FLUX_FWD(delim)) + {} - static constexpr auto find_next(auto& self, auto const& from) + template + requires std::equality_comparable_with, Delim const&> + constexpr auto operator()(Seq&& seq) const -> bounds_t { - if constexpr (detail::is_single_seq) { - // auto cur = self.base_[{cur, last}].find(self.pattern_.value()); - auto cur = flux::find(flux::slice(self.base_, from, flux::last), - self.pattern_.value()); - if (flux::is_last(self.base_, cur)) { - return bounds{cur, cur}; - } else { - return bounds{cur, flux::next(self.base_, cur)}; - } + auto nxt = flux::find(seq, delim_); + if (!flux::is_last(seq, nxt)) { + return bounds{nxt, flux::next(seq, nxt)}; } else { - return flux::search(flux::slice(self.base_, from, flux::last), - self.pattern_); + return bounds{nxt, nxt}; } } +}; -public: +template +struct predicate_splitter { +private: + FLUX_NO_UNIQUE_ADDRESS Pred pred_; - static constexpr bool is_infinite = infinite_sequence; +public: + constexpr explicit predicate_splitter(decays_to auto&& pred) + : pred_(FLUX_FWD(pred)) + {} - static constexpr auto first(auto& self) -> cursor_type + template + requires std::predicate> + constexpr auto operator()(Seq&& seq) const -> bounds_t { - auto bounds = flux::search(self.base_, self.pattern_); - return cursor_type(flux::first(self.base_), std::move(bounds)); + auto nxt = flux::find_if(seq, pred_); + if (!flux::is_last(seq, nxt)) { + return bounds{nxt, flux::next(seq, nxt)}; + } else { + return bounds{nxt, nxt}; + } } +}; - static constexpr auto is_last(auto& self, cursor_type const& cur) -> bool +struct split_fn { + template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Pattern&& pattern) const { - return flux::is_last(self.base_, cur.cur) && !cur.trailing_empty; + using splitter_t = pattern_splitter>; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(FLUX_FWD(pattern))); } - static constexpr auto read_at(auto& self, cursor_type const& cur) - requires sequence + template + requires multipass_sequence && + std::equality_comparable_with, Delim const&> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Delim&& delim) const { - return flux::slice(self.base_, cur.cur, cur.next.from); + using splitter_t = delim_splitter>; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(FLUX_FWD(delim))); } - static constexpr auto inc(auto& self, cursor_type& cur) + template + requires multipass_sequence && + std::predicate> + [[nodiscard]] + constexpr auto operator()(Seq&& seq, Pred pred) const { - cur.cur = cur.next.from; - if (!flux::is_last(self.base_, cur.cur)) { - cur.cur = cur.next.to; - if (flux::is_last(self.base_, cur.cur)) { - cur.trailing_empty = true; - cur.next = {cur.cur, cur.cur}; - } else { - cur.next = find_next(self, cur.cur); - } - } else { - cur.trailing_empty = false; - } + using splitter_t = predicate_splitter; + return split_adaptor, splitter_t>( + FLUX_FWD(seq), splitter_t(std::move(pred))); } }; +} // namespace detail + FLUX_EXPORT inline constexpr auto split = detail::split_fn{}; template -template - requires std::equality_comparable_with, element_t> +template + requires multipass_sequence && + multipass_sequence && + std::equality_comparable_with, element_t> constexpr auto inline_sequence_base::split(Pattern&& pattern) && { return flux::split(std::move(derived()), FLUX_FWD(pattern)); } template -template - requires decays_to> -constexpr auto inline_sequence_base::split(ValueType&& delim) && +template + requires multipass_sequence && + std::equality_comparable_with, Delim const&> +constexpr auto inline_sequence_base::split(Delim&& delim) && { return flux::split(std::move(derived()), FLUX_FWD(delim)); } +template +template + requires multipass_sequence && + std::predicate> +constexpr auto inline_sequence_base::split(Pred pred) && +{ + return flux::split(std::move(derived()), std::move(pred)); +} + } // namespace flux diff --git a/test/test_split.cpp b/test/test_split.cpp index c84b8cfa..4ab0598e 100644 --- a/test/test_split.cpp +++ b/test/test_split.cpp @@ -22,7 +22,7 @@ constexpr auto to_string_view = [](Seq&& seq) // danger Will Robi return std::basic_string_view>(flux::data(seq), flux::usize(seq)); }; -constexpr bool test_split() +constexpr bool test_split_with_delim() { using namespace std::string_view_literals; @@ -34,9 +34,11 @@ constexpr bool test_split() using S = decltype(split); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(flux::contiguous_sequence>); static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); static_assert(flux::contiguous_sequence>); STATIC_CHECK(check_equal(std::move(split).map(to_string_view), @@ -48,6 +50,16 @@ constexpr bool test_split() auto split = flux::split(" trailing space "sv, ' ').map(to_string_view); STATIC_CHECK(check_equal(split, std::array{""sv, "trailing"sv, "space"sv, ""sv})); + + auto cur = split.first(); + split.inc(cur); + split.inc(cur); + split.inc(cur); + STATIC_CHECK(cur.trailing_empty == true); + STATIC_CHECK(cur != split.last()); + split.inc(cur); + STATIC_CHECK(cur.trailing_empty == false); + STATIC_CHECK(cur == split.last()); } // Non-bounded sequences can be split correctly @@ -57,12 +69,20 @@ constexpr bool test_split() using S = decltype(split); static_assert(flux::multipass_sequence); + static_assert(not flux::bounded_sequence); static_assert(flux::contiguous_sequence>); STATIC_CHECK(check_equal(std::move(split).map(to_string_view), std::array{"a"sv, "b"sv})); } + return true; +} + +constexpr bool test_split_with_pattern() +{ + using namespace std::string_view_literals; + // Split with pattern { int nums[] = {0, 1, 2, 3, 99}; @@ -90,12 +110,82 @@ constexpr bool test_split() return true; } -static_assert(test_split()); +constexpr bool test_split_with_predicate() +{ + using namespace std::string_view_literals; + + { + std::array arr{1, 2, 0, 3, 4, 0, 5}; + + auto split = flux::ref(arr).split(flux::pred::eq(0)); + + using S = decltype(split); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); + static_assert(not flux::bidirectional_sequence); + static_assert(not flux::sized_sequence); + + static_assert(flux::sequence); + static_assert(flux::multipass_sequence); + static_assert(flux::bounded_sequence); + static_assert(not flux::bidirectional_sequence); + static_assert(not flux::sized_sequence); + + using E = flux::element_t; + static_assert(flux::contiguous_sequence); + static_assert(flux::sized_sequence); + + using EC = flux::element_t; + static_assert(flux::contiguous_sequence); + static_assert(flux::sized_sequence); + + auto cur = split.first(); + STATIC_CHECK(check_equal(split[cur], {1, 2})); + split.inc(cur); + STATIC_CHECK(check_equal(split[cur], {3, 4})); + split.inc(cur); + STATIC_CHECK(cur != split.last()); + STATIC_CHECK(check_equal(split[cur], {5})); + split.inc(cur); + STATIC_CHECK(split.is_last(cur)); + + STATIC_CHECK(cur == split.last()); + } + + { + auto const seq = flux::split("two spaces -> <-"sv, flux::pred::eq(' ')) + .map(to_string_view); + + STATIC_CHECK(check_equal(seq, + std::array{"two"sv, "spaces"sv, "->"sv, ""sv, "<-"sv})); + } + + return true; +} + +static_assert(test_split_with_delim()); +static_assert(test_split_with_pattern()); +static_assert(test_split_with_predicate()); + +} + +TEST_CASE("split with delimiter") +{ + bool result = test_split_with_delim(); + REQUIRE(result); +} + +TEST_CASE("split with pattern") +{ + bool result = test_split_with_pattern(); + REQUIRE(result); } -TEST_CASE("split") +TEST_CASE("split with predicate") { - bool result = test_split(); + bool result = test_split_with_predicate(); REQUIRE(result); }