Skip to content

Commit

Permalink
Merge pull request #141 from tcbrindle/pr/split_on
Browse files Browse the repository at this point in the history
Make splitting more generic
  • Loading branch information
tcbrindle authored Dec 12, 2023
2 parents c2be7c5 + 3daf890 commit 51fa653
Show file tree
Hide file tree
Showing 8 changed files with 423 additions and 102 deletions.
52 changes: 43 additions & 9 deletions docs/reference/adaptors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -631,23 +631,57 @@ Adaptors
.. function::
auto slide(multipass_sequence auto seq, std::integral auto win_sz) -> multipass_sequence auto;

``stride``
^^^^^^^^^^

.. function::
auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto;

``split``
^^^^^^^^^

.. function::
template <multipass_sequence Seq, typename Delim> \
requires std::equality_comparable_with<element_t<Seq>, Delim const&> \
auto split(Seq seq, Delim delim) -> multipass_sequence auto;

.. function::
template <multipass_sequence Seq, multipass_sequence Pattern> \
requires std::equality_comparable_with<element_t<Seq>, element_t<Pattern>> \
auto split(Seq seq, Pattern pattern) -> sequence auto;
auto split(Seq seq, Pattern pattern) -> multipass_sequence auto;

.. function::
template <multipass_sequence Seq> \
auto split(Seq seq, value_t<Seq> delim) -> sequence auto;
template <multipass_sequence Seq, typename Pred> \
requires std::predicate<Pred const&, element_t<seq>> \
auto split(Seq seq, Pred pred) -> multipass_sequence auto;

Splits a :concept:`multipass_sequence` into a sequence-of-subsequences using the given argument.

The first overload takes a delimiter, which must be equality comparable with the source sequence's value type. The source sequence will be split on each occurrence of the delimiter, with the delimiter itself removed. Consecutive delimiters will result in empty subsequences in the output. If the source sequence begins with a delimiter then the first subsequence will be empty, and likewise if it ends with a delimiter then the final subsequence will be empty.

The second overload takes another sequence, the :var:`pattern`, whose elements must be equality comparable with the elements of the source sequence. The source is split whenever the pattern occurs as a subsequence. Consecutive (non-overlapping) occurrences of the pattern will result in empty sequences in the output. If :expr:`ends_with(seq, pattern)` is :expr:`true`, the final subsequence will be empty.

The third overload takes a unary predicate which will be called with successive elements of the source sequence and returns :expr:`true` when a split should occur. The "``true``" element will be removed from the output. If the predicate returns ``true`` for two consecutive of the source, then the output will contain an empty subsequence. If the predicate returns ``true``` for the final element of the source, then the final subsequence will be empty.

The returned sequence is always a :concept:`multipass_sequence`. It is additionally a :concept:`bounded_sequence` when :var:`Seq` is bounded.

:param seq: A multipass sequence to split.
:param delim: For the first overload, a delimiter to split on. Must be equality comparable with the element type of :var:`seq`
:param pattern: For the second overload, a multipass sequence to split on. Its element type must be equality comparable with the element type of :var:`seq`.
:param pred: For the third overload, a unary predicate accepting elements of :var:`seq`, returning ``true`` when a split should occur.

:returns: A multipass sequence whose elements are subsequences of :var:`seq`.

:example:

.. literalinclude:: ../../example/docs/split.cpp
:language: cpp
:dedent:
:lines: 18-79

:see also:
* `std::views::split() <https://en.cppreference.com/w/cpp/ranges/split_view>`_
* :func:`flux::chunk_by`

``stride``
^^^^^^^^^^

.. function::
auto stride(sequence auto seq, std::integral auto stride_len) -> sequence auto;

``take``
^^^^^^^^
Expand Down
7 changes: 5 additions & 2 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
function(ADD_EXAMPLE NAME SOURCE)
add_executable(${NAME} ${SOURCE})
target_link_libraries(${NAME} flux)
if(NOT ${${NAME}_SKIP_TEST})
if(${${NAME}_SKIP_TEST})
#pass
else()
add_test(NAME ${NAME} COMMAND ${NAME} ${${NAME}_ARGS})
endif()
endfunction()
Expand All @@ -11,8 +13,8 @@ add_example(example-config-parser config_parser.cpp)
add_example(example-calendar calendar.cpp)
add_example(example-merge-intervals merge_intervals.cpp)
add_example(example-histogram histogram.cpp)
set(example-word-count_SKIP_TEST ON)
add_example(example-word-count word_count.cpp)
set(example-word-count_SKIP_TEST TRUE)
add_example(example-prime-numbers prime_numbers.cpp)
add_example(example-shortest-path shortest_path.cpp)
add_example(example-moving-average moving_average.cpp)
Expand Down Expand Up @@ -41,6 +43,7 @@ add_example(example-docs-set-intersection docs/set_intersection.cpp)
add_example(example-docs-set-symmetric-difference docs/set_symmetric_difference.cpp)
add_example(example-docs-set-union docs/set_union.cpp)
add_example(example-docs-scan-first docs/scan_first.cpp)
add_example(example-docs-split docs/split.cpp)
add_example(example-docs-starts-with docs/starts_with.cpp)
add_example(example-docs-unfold docs/unfold.cpp)

Expand Down
80 changes: 80 additions & 0 deletions example/docs/split.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

// Copyright (c) 2023 Tristan Brindle (tcbrindle at gmail dot com)
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <flux.hpp>

#include <array>
#include <cassert>
#include <string_view>
#include <vector>
#include <iostream>

using namespace std::string_view_literals;

int main()
{
using flux::equal;

// We can split a sequence using a single delimiter
auto seq1 = flux::split("here are some words"sv, ' ');
assert(equal(seq1, std::array{"here"sv, "are"sv, "some"sv, "words"sv}));


// Consecutive delimiters will result in empty subsequences in the output
auto seq2 = flux::split("some,,,commas"sv, ',');
assert(equal(seq2, std::array{"some"sv, ""sv, ""sv, "commas"sv}));


// If the sequence ends with a delimiter, the final subsequence will be empty
auto seq3 = flux::split("Two. Sentences."sv, '.');
assert(equal(seq3, std::array{"Two"sv, " Sentences"sv, ""sv}));


// We can also split a sequence with a pattern
auto seq4 = flux::split(std::vector{1, 2, 3, 4, 5}, std::array{2, 3});
assert(equal(seq4, std::vector{std::vector{1}, std::vector{4, 5}}));


// Repeated, non-overlapping patterns result in empty subsequences
auto seq5 = flux::split("Hello!!!!World"sv, "!!"sv);
assert(equal(seq5, std::array{"Hello"sv, ""sv, "World"sv}));


// Overlapping patterns are only matched once
auto seq6 = flux::split("Hello!!!World"sv, "!!"sv);
assert(equal(seq6, std::array{"Hello"sv, "!World"sv}));


// If the sequence begins with the pattern, the first subsequence will
// be empty...
auto seq7 = flux::split("!!Hello"sv, "!!"sv);
assert(equal(seq7, std::array{""sv, "Hello"sv}));


// ... and likewise if it ends with the pattern
auto seq8 = flux::split("Hello!!"sv, "!!"sv);
assert(equal(seq8, std::array{"Hello"sv, ""sv}));


// Lastly, we can split using a predicate function
auto is_digit = [](char c) { return c >= '0' && c <= '9'; };

auto seq9 = flux::split("These1are2some3words"sv, is_digit);
assert(equal(seq9, std::array{"These"sv, "are"sv, "some"sv, "words"sv}));


// As usual, consecutive "true" elements in the input will produce
// empty subsequences in the output
auto seq10 = flux::split("A123B"sv, is_digit);
assert(equal(seq10, std::array{"A"sv, ""sv, ""sv, "B"sv}));


// It can be useful combine splitting with a "not empty" filter
auto is_space = [](char c) { return std::isspace(static_cast<unsigned char>(c)); };

auto seq11 = flux::split("Alpha Bravo\t\rCharlie \n"sv, is_space)
.filter(std::not_fn(flux::is_empty));
assert(equal(seq11, std::array{"Alpha"sv, "Bravo"sv, "Charlie"sv}));
}
19 changes: 14 additions & 5 deletions include/flux/core/inline_sequence_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,15 +317,24 @@ struct inline_sequence_base {
[[nodiscard]]
constexpr auto slide(std::integral auto win_sz) && requires multipass_sequence<Derived>;

template <multipass_sequence Pattern>
requires std::equality_comparable_with<element_t<Derived>, element_t<Pattern>>
template <typename Pattern>
requires multipass_sequence<Derived> &&
multipass_sequence<Pattern> &&
std::equality_comparable_with<element_t<Derived>, element_t<Pattern>>
[[nodiscard]]
constexpr auto split(Pattern&& pattern) &&;

template <typename ValueType>
requires decays_to<ValueType, value_t<Derived>>
template <typename Delim>
requires multipass_sequence<Derived> &&
std::equality_comparable_with<element_t<Derived>, Delim const&>
[[nodiscard]]
constexpr auto split(Delim&& delim) &&;

template <typename Pred>
requires multipass_sequence<Derived> &&
std::predicate<Pred const&, element_t<Derived>>
[[nodiscard]]
constexpr auto split(ValueType&& delim) &&;
constexpr auto split(Pred pred) &&;

template <typename Pattern>
[[nodiscard]]
Expand Down
6 changes: 3 additions & 3 deletions include/flux/core/sequence_access.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ struct last_fn {
struct size_fn {
template <sized_sequence Seq>
[[nodiscard]]
constexpr auto operator()(Seq& seq) const -> distance_t
constexpr auto operator()(Seq&& seq) const -> distance_t
{
if constexpr (requires { traits_t<Seq>::size(seq); }) {
return traits_t<Seq>::size(seq);
Expand All @@ -129,7 +129,7 @@ struct size_fn {
struct usize_fn {
template <sized_sequence Seq>
[[nodiscard]]
constexpr auto operator()(Seq& seq) const -> std::size_t
constexpr auto operator()(Seq&& seq) const -> std::size_t
{
return checked_cast<std::size_t>(size_fn{}(seq));
}
Expand Down Expand Up @@ -277,7 +277,7 @@ struct is_empty_fn {
template <sequence Seq>
requires (multipass_sequence<Seq> || sized_sequence<Seq>)
[[nodiscard]]
constexpr auto operator()(Seq& seq) const -> bool
constexpr auto operator()(Seq&& seq) const -> bool
{
if constexpr (sized_sequence<Seq>) {
return flux::size(seq) == 0;
Expand Down
16 changes: 16 additions & 0 deletions include/flux/op/equal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,22 @@ struct equal_fn {
return impl(seq1, seq2, cmp);
}
}

template <sequence Seq1, sequence Seq2>
requires (sequence<element_t<Seq1>> &&
sequence<element_t<Seq2>> &&
!std::equality_comparable_with<element_t<Seq1>, element_t<Seq2>> &&
std::is_invocable_v<equal_fn&, Seq1&, Seq2&, equal_fn&>)
constexpr auto operator()(Seq1&& seq1, Seq2&& seq2) const -> bool
{
if constexpr (sized_sequence<Seq1> && sized_sequence<Seq2>) {
if (flux::size(seq1) != flux::size(seq2)) {
return false;
}
}

return (*this)(seq1, seq2, *this);
}
};

} // namespace detail
Expand Down
Loading

0 comments on commit 51fa653

Please sign in to comment.