Skip to content

Commit

Permalink
remove usage of non-standard types in std::char_traits
Browse files Browse the repository at this point in the history
  • Loading branch information
maxbachmann committed Oct 24, 2024
1 parent cbdf843 commit 65e9022
Show file tree
Hide file tree
Showing 20 changed files with 246 additions and 145 deletions.
133 changes: 92 additions & 41 deletions extras/rapidfuzz_amalgamated.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
// SPDX-License-Identifier: MIT
// RapidFuzz v1.0.2
// Generated: 2024-07-02 16:47:26.932914
// Generated: 2024-10-24 12:06:59.588890
// ----------------------------------------------------------
// This file is an amalgamation of multiple different files.
// You probably shouldn't edit it directly.
Expand Down Expand Up @@ -4511,8 +4511,8 @@ void lcs_simd(Range<size_t*> scores, const BlockPatternMatchVector& block, const
#endif

template <size_t N, bool RecordMatrix, typename PMV, typename InputIt1, typename InputIt2>
auto lcs_unroll(const PMV& block, const Range<InputIt1>&, const Range<InputIt2>& s2, size_t score_cutoff = 0)
-> LCSseqResult<RecordMatrix>
auto lcs_unroll(const PMV& block, const Range<InputIt1>&, const Range<InputIt2>& s2,
size_t score_cutoff = 0) -> LCSseqResult<RecordMatrix>
{
uint64_t S[N];
unroll<size_t, N>([&](size_t i) { S[i] = ~UINT64_C(0); });
Expand Down Expand Up @@ -6662,12 +6662,12 @@ struct CachedJaroWinkler : public detail::CachedSimilarityBase<CachedJaroWinkler
};

template <typename Sentence1>
explicit CachedJaroWinkler(const Sentence1& s1_, double _prefix_weight = 0.1)
-> CachedJaroWinkler<char_type<Sentence1>>;
explicit CachedJaroWinkler(const Sentence1& s1_,
double _prefix_weight = 0.1) -> CachedJaroWinkler<char_type<Sentence1>>;

template <typename InputIt1>
CachedJaroWinkler(InputIt1 first1, InputIt1 last1, double _prefix_weight = 0.1)
-> CachedJaroWinkler<iter_value_t<InputIt1>>;
CachedJaroWinkler(InputIt1 first1, InputIt1 last1,
double _prefix_weight = 0.1) -> CachedJaroWinkler<iter_value_t<InputIt1>>;

} // namespace rapidfuzz

Expand Down Expand Up @@ -7135,8 +7135,8 @@ size_t levenshtein_hyrroe2003_small_band(const BlockPatternMatchVector& PM, cons
}

template <bool RecordMatrix, typename InputIt1, typename InputIt2>
auto levenshtein_hyrroe2003_small_band(const Range<InputIt1>& s1, const Range<InputIt2>& s2, size_t max)
-> LevenshteinResult<RecordMatrix, false>
auto levenshtein_hyrroe2003_small_band(const Range<InputIt1>& s1, const Range<InputIt2>& s2,
size_t max) -> LevenshteinResult<RecordMatrix, false>
{
assert(max <= s1.size());
assert(max <= s2.size());
Expand Down Expand Up @@ -8358,12 +8358,12 @@ struct CachedLevenshtein : public detail::CachedDistanceBase<CachedLevenshtein<C
};

template <typename Sentence1>
explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {1, 1, 1})
-> CachedLevenshtein<char_type<Sentence1>>;
explicit CachedLevenshtein(const Sentence1& s1_, LevenshteinWeightTable aWeights = {
1, 1, 1}) -> CachedLevenshtein<char_type<Sentence1>>;

template <typename InputIt1>
CachedLevenshtein(InputIt1 first1, InputIt1 last1, LevenshteinWeightTable aWeights = {1, 1, 1})
-> CachedLevenshtein<iter_value_t<InputIt1>>;
CachedLevenshtein(InputIt1 first1, InputIt1 last1,
LevenshteinWeightTable aWeights = {1, 1, 1}) -> CachedLevenshtein<iter_value_t<InputIt1>>;

} // namespace rapidfuzz

Expand Down Expand Up @@ -9151,35 +9151,39 @@ CachedPrefix(InputIt1 first1, InputIt1 last1) -> CachedPrefix<iter_value_t<Input

namespace rapidfuzz {

template <typename CharT, typename InputIt1, typename InputIt2>
std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
namespace detail {
template <typename ReturnType, typename InputIt1, typename InputIt2>
ReturnType editops_apply_impl(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
{
auto len1 = static_cast<size_t>(std::distance(first1, last1));
auto len2 = static_cast<size_t>(std::distance(first2, last2));

std::basic_string<CharT> res_str;
ReturnType res_str;
res_str.resize(len1 + len2);
size_t src_pos = 0;
size_t dest_pos = 0;

for (const auto& op : ops) {
/* matches between last and current editop */
while (src_pos < op.src_pos) {
res_str[dest_pos] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(src_pos)]);
res_str[dest_pos] =
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
src_pos++;
dest_pos++;
}

switch (op.type) {
case EditType::None:
case EditType::Replace:
res_str[dest_pos] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
res_str[dest_pos] =
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
src_pos++;
dest_pos++;
break;
case EditType::Insert:
res_str[dest_pos] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
res_str[dest_pos] =
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(op.dest_pos)]);
dest_pos++;
break;
case EditType::Delete: src_pos++; break;
Expand All @@ -9188,7 +9192,8 @@ std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, Inpu

/* matches after the last editop */
while (src_pos < len1) {
res_str[dest_pos] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(src_pos)]);
res_str[dest_pos] =
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(src_pos)]);
src_pos++;
dest_pos++;
}
Expand All @@ -9197,35 +9202,30 @@ std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, Inpu
return res_str;
}

template <typename CharT, typename Sentence1, typename Sentence2>
std::basic_string<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
{
return editops_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2),
detail::to_end(s2));
}

template <typename CharT, typename InputIt1, typename InputIt2>
std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
template <typename ReturnType, typename InputIt1, typename InputIt2>
ReturnType opcodes_apply_impl(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
{
auto len1 = static_cast<size_t>(std::distance(first1, last1));
auto len2 = static_cast<size_t>(std::distance(first2, last2));

std::basic_string<CharT> res_str;
ReturnType res_str;
res_str.resize(len1 + len2);
size_t dest_pos = 0;

for (const auto& op : ops) {
switch (op.type) {
case EditType::None:
for (auto i = op.src_begin; i < op.src_end; ++i) {
res_str[dest_pos++] = static_cast<CharT>(first1[static_cast<ptrdiff_t>(i)]);
res_str[dest_pos++] =
static_cast<typename ReturnType::value_type>(first1[static_cast<ptrdiff_t>(i)]);
}
break;
case EditType::Replace:
case EditType::Insert:
for (auto i = op.dest_begin; i < op.dest_end; ++i) {
res_str[dest_pos++] = static_cast<CharT>(first2[static_cast<ptrdiff_t>(i)]);
res_str[dest_pos++] =
static_cast<typename ReturnType::value_type>(first2[static_cast<ptrdiff_t>(i)]);
}
break;
case EditType::Delete: break;
Expand All @@ -9236,11 +9236,62 @@ std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, Inpu
return res_str;
}

} // namespace detail

template <typename CharT, typename InputIt1, typename InputIt2>
std::basic_string<CharT> editops_apply_str(const Editops& ops, InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2)
{
return detail::editops_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
}

template <typename CharT, typename Sentence1, typename Sentence2>
std::basic_string<CharT> editops_apply_str(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
{
return detail::editops_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
detail::to_begin(s2), detail::to_end(s2));
}

template <typename CharT, typename InputIt1, typename InputIt2>
std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2)
{
return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, first1, last1, first2, last2);
}

template <typename CharT, typename Sentence1, typename Sentence2>
std::basic_string<CharT> opcodes_apply_str(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
{
return detail::opcodes_apply_impl<std::basic_string<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
detail::to_begin(s2), detail::to_end(s2));
}

template <typename CharT, typename InputIt1, typename InputIt2>
std::vector<CharT> editops_apply_vec(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
{
return detail::editops_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
}

template <typename CharT, typename Sentence1, typename Sentence2>
std::vector<CharT> editops_apply_vec(const Editops& ops, const Sentence1& s1, const Sentence2& s2)
{
return detail::editops_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
detail::to_begin(s2), detail::to_end(s2));
}

template <typename CharT, typename InputIt1, typename InputIt2>
std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2)
{
return detail::opcodes_apply_impl<std::vector<CharT>>(ops, first1, last1, first2, last2);
}

template <typename CharT, typename Sentence1, typename Sentence2>
std::basic_string<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
std::vector<CharT> opcodes_apply_vec(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2)
{
return opcodes_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2),
detail::to_end(s2));
return detail::opcodes_apply_impl<std::vector<CharT>>(ops, detail::to_begin(s1), detail::to_end(s1),
detail::to_begin(s2), detail::to_end(s2));
}

} // namespace rapidfuzz
Expand Down Expand Up @@ -9669,8 +9720,8 @@ explicit CachedPartialTokenSortRatio(const Sentence1& s1)
-> CachedPartialTokenSortRatio<char_type<Sentence1>>;

template <typename InputIt1>
CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
-> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
CachedPartialTokenSortRatio(InputIt1 first1,
InputIt1 last1) -> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;

/**
* @brief Compares the words in the strings based on unique and common words
Expand Down Expand Up @@ -9793,8 +9844,8 @@ template <typename Sentence1>
explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio<char_type<Sentence1>>;

template <typename InputIt1>
CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
-> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
CachedPartialTokenSetRatio(InputIt1 first1,
InputIt1 last1) -> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;

/**
* @brief Helper method that returns the maximum of fuzz::token_set_ratio and
Expand Down
10 changes: 5 additions & 5 deletions fuzzing/fuzz_damerau_levenshtein_distance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
#include <stdexcept>
#include <string>

void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>& s1,
const std::basic_string<uint8_t>& s2, size_t score_cutoff)
void validate_distance(size_t reference_dist, const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2,
size_t score_cutoff)
{
if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1;

Expand All @@ -26,7 +26,7 @@ void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>&

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
std::basic_string<uint8_t> s1, s2;
std::vector<uint8_t> s1, s2;
if (!extract_strings(data, size, s1, s2)) return 0;

size_t reference_dist = rapidfuzz_reference::damerau_levenshtein_distance(s1, s2);
Expand All @@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)

/* test long sequences */
for (unsigned int i = 2; i < 9; ++i) {
std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i));
std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i));
std::vector<uint8_t> s1_ = vec_multiply(s1, pow<size_t>(2, i));
std::vector<uint8_t> s2_ = vec_multiply(s2, pow<size_t>(2, i));

if (s1_.size() > 10000 || s2_.size() > 10000) break;

Expand Down
5 changes: 2 additions & 3 deletions fuzzing/fuzz_indel_distance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
#include <stdexcept>
#include <string>

void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2,
size_t score_cutoff)
void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2, size_t score_cutoff)
{
auto dist = rapidfuzz::indel_distance(s1, s2, score_cutoff);
auto reference_dist = rapidfuzz_reference::indel_distance(s1, s2, score_cutoff);
Expand All @@ -25,7 +24,7 @@ void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_st

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
std::basic_string<uint8_t> s1, s2;
std::vector<uint8_t> s1, s2;
if (!extract_strings(data, size, s1, s2)) return 0;

validate_distance(s1, s2, 0);
Expand Down
4 changes: 2 additions & 2 deletions fuzzing/fuzz_indel_editops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
std::basic_string<uint8_t> s1, s2;
std::vector<uint8_t> s1, s2;
if (!extract_strings(data, size, s1, s2)) return 0;

size_t score = rapidfuzz_reference::indel_distance(s1, s2);
rapidfuzz::Editops ops = rapidfuzz::indel_editops(s1, s2);

if (ops.size() == score && s2 != rapidfuzz::editops_apply<uint8_t>(ops, s1, s2))
if (ops.size() == score && s2 != rapidfuzz::editops_apply_vec<uint8_t>(ops, s1, s2))
throw std::logic_error("levenshtein_editops failed");

return 0;
Expand Down
12 changes: 6 additions & 6 deletions fuzzing/fuzz_jaro_similarity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ bool is_close(double a, double b, double epsilon)
}

template <size_t MaxLen>
void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
{
#ifdef RAPIDFUZZ_SIMD
size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0);
if (count == 0) return;

rapidfuzz::experimental::MultiJaro<MaxLen> scorer(count);

std::vector<std::basic_string<uint8_t>> strings;
std::vector<std::vector<uint8_t>> strings;

for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) {
if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) {
Expand Down Expand Up @@ -59,7 +59,7 @@ void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string
#endif
}

void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
{
double reference_sim = rapidfuzz_reference::jaro_similarity(s1, s2);
double sim = rapidfuzz::jaro_similarity(s1, s2);
Expand All @@ -80,15 +80,15 @@ void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_st

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
std::basic_string<uint8_t> s1, s2;
std::vector<uint8_t> s1, s2;
if (!extract_strings(data, size, s1, s2)) return 0;

validate_distance(s1, s2);

/* test long sequences */
for (unsigned int i = 2; i < 9; ++i) {
std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i));
std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i));
std::vector<uint8_t> s1_ = vec_multiply(s1, pow<size_t>(2, i));
std::vector<uint8_t> s2_ = vec_multiply(s2, pow<size_t>(2, i));

if (s1_.size() > 10000 || s2_.size() > 10000) break;

Expand Down
6 changes: 3 additions & 3 deletions fuzzing/fuzz_lcs_similarity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
#include <string>

template <size_t MaxLen>
void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2)
void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2)
{
#ifdef RAPIDFUZZ_SIMD
size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0);
rapidfuzz::experimental::MultiLCSseq<MaxLen> scorer(count);

std::vector<std::basic_string<uint8_t>> strings;
std::vector<std::vector<uint8_t>> strings;

for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) {
if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) {
Expand Down Expand Up @@ -51,7 +51,7 @@ void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string

extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
{
std::basic_string<uint8_t> s1, s2;
std::vector<uint8_t> s1, s2;
if (!extract_strings(data, size, s1, s2)) {
return 0;
}
Expand Down
Loading

0 comments on commit 65e9022

Please sign in to comment.