diff --git a/extras/rapidfuzz_amalgamated.hpp b/extras/rapidfuzz_amalgamated.hpp index ca138545..d972d39f 100644 --- a/extras/rapidfuzz_amalgamated.hpp +++ b/extras/rapidfuzz_amalgamated.hpp @@ -1,7 +1,7 @@ // Licensed under the MIT License . // SPDX-License-Identifier: MIT // RapidFuzz v1.0.2 -// Generated: 2023-12-25 15:26:08.006867 +// Generated: 2023-12-25 16:08:55.279788 // ---------------------------------------------------------- // This file is an amalgamation of multiple different files. // You probably shouldn't edit it directly. @@ -4593,7 +4593,7 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + if constexpr (RecordMatrix) res.S.set_offset(row, static_cast(first_block * word_size)); for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -4753,8 +4753,8 @@ Editops recover_alignment(const Range& s1, const Range& s2, /* Deletion */ if (matrix.S.test_bit(row - 1, col - 1)) { assert(dist > 0); - assert(static_cast(col) >= - static_cast(row) - static_cast(band_width_right)); + assert(static_cast(col) >= + static_cast(row) - static_cast(band_width_right)); dist--; col--; editops[dist].type = EditType::Delete; @@ -7051,7 +7051,7 @@ size_t levenshtein_hyrroe2003_small_band(const BlockPatternMatchVector& PM, cons size_t currDist = max; uint64_t diagonal_mask = UINT64_C(1) << 63; uint64_t horizontal_mask = UINT64_C(1) << 62; - ssize_t start_pos = static_cast(max) + 1 - 64; + ptrdiff_t start_pos = static_cast(max) + 1 - 64; /* score can decrease along the horizontal, but not along the diagonal */ size_t break_score = 2 * max + s2.size() - s1.size(); @@ -7145,10 +7145,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(s2.size(), 1, ~UINT64_C(0)); res.VN = ShiftedBitMatrix(s2.size(), 1, 0); - ssize_t start_offset = static_cast(max) + 2 - 64; + ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res.VP.set_offset(i, start_offset + static_cast(i)); + res.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -7157,10 +7157,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range::value_type, std::pair> PM; + HybridGrowingHashmap::value_type, std::pair> PM; auto iter_s1 = s1.begin(); - for (ssize_t j = -static_cast(max); j < 0; ++iter_s1, ++j) { + for (ptrdiff_t j = -static_cast(max); j < 0; ++iter_s1, ++j) { auto& x = PM[*iter_s1]; x.second = shr64(x.second, j - x.first) | (UINT64_C(1) << 63); x.first = j; @@ -7175,12 +7175,12 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(i) - x.first) | (UINT64_C(1) << 63); - x.first = static_cast(i); + x.second = shr64(x.second, static_cast(i) - x.first) | (UINT64_C(1) << 63); + x.first = static_cast(i); } { auto x = PM.get(*iter_s2); - PM_j = shr64(x.second, static_cast(i) - x.first); + PM_j = shr64(x.second, static_cast(i) - x.first); } uint64_t X = PM_j; @@ -7214,13 +7214,13 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(i) - x.first) | (UINT64_C(1) << 63); - x.first = static_cast(i); + x.second = shr64(x.second, static_cast(i) - x.first) | (UINT64_C(1) << 63); + x.first = static_cast(i); ++iter_s1; } { auto x = PM.get(*iter_s2); - PM_j = shr64(x.second, static_cast(i) - x.first); + PM_j = shr64(x.second, static_cast(i) - x.first); } uint64_t X = PM_j; @@ -7309,8 +7309,8 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HN_carry = 0; if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + res.VP.set_offset(row, static_cast(first_block * word_size)); + res.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -7359,14 +7359,14 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range for (size_t word = first_block; word <= last_block /* - 1*/; word++) { /* Step 3: Computing the value D[m,j] */ - scores[word] = static_cast(static_cast(scores[word]) + advance_block(word)); + scores[word] = static_cast(static_cast(scores[word]) + advance_block(word)); } max = static_cast( - std::min(static_cast(max), - static_cast(scores[last_block]) + - std::max(static_cast(s2.size()) - static_cast(row) - 1, - static_cast(s1.size()) - + std::min(static_cast(max), + static_cast(scores[last_block]) + + std::max(static_cast(s2.size()) - static_cast(row) - 1, + static_cast(s1.size()) - (static_cast((1 + last_block) * word_size - 1) - 1)))); /*---------- Adjust number of blocks according to Ukkonen ----------*/ @@ -7376,9 +7376,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range /* If block is not beneath band, calculate next block. Only next because others are certainly beneath * band. */ if (last_block + 1 < words) { - ssize_t cond = static_cast(max + 2 * word_size + row + s1.size()) - - static_cast(scores[last_block] + 2 + s2.size()); - if (static_cast(get_row_num(last_block)) < cond) { + ptrdiff_t cond = static_cast(max + 2 * word_size + row + s1.size()) - + static_cast(scores[last_block] + 2 + s2.size()); + if (static_cast(get_row_num(last_block)) < cond) { last_block++; vecs[last_block].VP = ~UINT64_C(0); vecs[last_block].VN = 0; @@ -7387,8 +7387,8 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range scores[last_block] = scores[last_block - 1] + chars_in_block - static_cast(HP_carry) + static_cast(HN_carry); // todo probably wrong types - scores[last_block] = - static_cast(static_cast(scores[last_block]) + advance_block(last_block)); + scores[last_block] = static_cast(static_cast(scores[last_block]) + + advance_block(last_block)); } } @@ -7403,9 +7403,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range * this uses a more loose condition similar to edlib: * https://github.com/Martinsos/edlib */ - ssize_t cond = static_cast(max + 2 * word_size + row + s1.size() + 1) - - static_cast(scores[last_block] + 2 + s2.size()); - bool in_band_cond2 = static_cast(get_row_num(last_block)) <= cond; + ptrdiff_t cond = static_cast(max + 2 * word_size + row + s1.size() + 1) - + static_cast(scores[last_block] + 2 + s2.size()); + bool in_band_cond2 = static_cast(get_row_num(last_block)) <= cond; if (in_band_cond1 && in_band_cond2) break; } @@ -7419,9 +7419,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range * if this condition is met for the last cell in the block, it * is met for all other cells in the blocks as well */ - ssize_t cond = static_cast(scores[first_block] + s1.size() + row) - - static_cast(max + s2.size()); - bool in_band_cond2 = static_cast(get_row_num(first_block)) >= cond; + ptrdiff_t cond = static_cast(scores[first_block] + s1.size() + row) - + static_cast(max + s2.size()); + bool in_band_cond2 = static_cast(get_row_num(first_block)) >= cond; if (in_band_cond1 && in_band_cond2) break; } @@ -10194,10 +10194,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range last */ size_t known_edits = detail::abs_diff(scores[window.first], scores[window.second]); /* half of the cells that are not needed for known_edits can lead to a better score */ - ssize_t min_score = - static_cast(std::min(scores[window.first], scores[window.second])) - - static_cast(cell_diff + known_edits / 2); - if (min_score < static_cast(cutoff_dist)) { + ptrdiff_t min_score = + static_cast(std::min(scores[window.first], scores[window.second])) - + static_cast(cell_diff + known_edits / 2); + if (min_score < static_cast(cutoff_dist)) { size_t center = cell_diff / 2; new_windows.emplace_back(window.first, window.first + center); new_windows.emplace_back(window.first + center, window.second); diff --git a/rapidfuzz/distance/LCSseq_impl.hpp b/rapidfuzz/distance/LCSseq_impl.hpp index ba843dc2..6c6cc5d0 100644 --- a/rapidfuzz/distance/LCSseq_impl.hpp +++ b/rapidfuzz/distance/LCSseq_impl.hpp @@ -270,7 +270,7 @@ auto lcs_blockwise(const PMV& PM, const Range& s1, const Range(first_block * word_size)); + if constexpr (RecordMatrix) res.S.set_offset(row, static_cast(first_block * word_size)); for (size_t word = first_block; word < last_block; ++word) { const uint64_t Matches = PM.get(word, *iter_s2); @@ -430,8 +430,8 @@ Editops recover_alignment(const Range& s1, const Range& s2, /* Deletion */ if (matrix.S.test_bit(row - 1, col - 1)) { assert(dist > 0); - assert(static_cast(col) >= - static_cast(row) - static_cast(band_width_right)); + assert(static_cast(col) >= + static_cast(row) - static_cast(band_width_right)); dist--; col--; editops[dist].type = EditType::Delete; diff --git a/rapidfuzz/distance/Levenshtein_impl.hpp b/rapidfuzz/distance/Levenshtein_impl.hpp index 93ce5ff9..3aad9813 100644 --- a/rapidfuzz/distance/Levenshtein_impl.hpp +++ b/rapidfuzz/distance/Levenshtein_impl.hpp @@ -398,7 +398,7 @@ size_t levenshtein_hyrroe2003_small_band(const BlockPatternMatchVector& PM, cons size_t currDist = max; uint64_t diagonal_mask = UINT64_C(1) << 63; uint64_t horizontal_mask = UINT64_C(1) << 62; - ssize_t start_pos = static_cast(max) + 1 - 64; + ptrdiff_t start_pos = static_cast(max) + 1 - 64; /* score can decrease along the horizontal, but not along the diagonal */ size_t break_score = 2 * max + s2.size() - s1.size(); @@ -492,10 +492,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(s2.size(), 1, ~UINT64_C(0)); res.VN = ShiftedBitMatrix(s2.size(), 1, 0); - ssize_t start_offset = static_cast(max) + 2 - 64; + ptrdiff_t start_offset = static_cast(max) + 2 - 64; for (size_t i = 0; i < s2.size(); ++i) { - res.VP.set_offset(i, start_offset + static_cast(i)); - res.VN.set_offset(i, start_offset + static_cast(i)); + res.VP.set_offset(i, start_offset + static_cast(i)); + res.VN.set_offset(i, start_offset + static_cast(i)); } } @@ -504,10 +504,10 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range::value_type, std::pair> PM; + HybridGrowingHashmap::value_type, std::pair> PM; auto iter_s1 = s1.begin(); - for (ssize_t j = -static_cast(max); j < 0; ++iter_s1, ++j) { + for (ptrdiff_t j = -static_cast(max); j < 0; ++iter_s1, ++j) { auto& x = PM[*iter_s1]; x.second = shr64(x.second, j - x.first) | (UINT64_C(1) << 63); x.first = j; @@ -522,12 +522,12 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(i) - x.first) | (UINT64_C(1) << 63); - x.first = static_cast(i); + x.second = shr64(x.second, static_cast(i) - x.first) | (UINT64_C(1) << 63); + x.first = static_cast(i); } { auto x = PM.get(*iter_s2); - PM_j = shr64(x.second, static_cast(i) - x.first); + PM_j = shr64(x.second, static_cast(i) - x.first); } uint64_t X = PM_j; @@ -561,13 +561,13 @@ auto levenshtein_hyrroe2003_small_band(const Range& s1, const Range(i) - x.first) | (UINT64_C(1) << 63); - x.first = static_cast(i); + x.second = shr64(x.second, static_cast(i) - x.first) | (UINT64_C(1) << 63); + x.first = static_cast(i); ++iter_s1; } { auto x = PM.get(*iter_s2); - PM_j = shr64(x.second, static_cast(i) - x.first); + PM_j = shr64(x.second, static_cast(i) - x.first); } uint64_t X = PM_j; @@ -657,8 +657,8 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range uint64_t HN_carry = 0; if constexpr (RecordMatrix) { - res.VP.set_offset(row, static_cast(first_block * word_size)); - res.VN.set_offset(row, static_cast(first_block * word_size)); + res.VP.set_offset(row, static_cast(first_block * word_size)); + res.VN.set_offset(row, static_cast(first_block * word_size)); } auto advance_block = [&](size_t word) { @@ -707,14 +707,14 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range for (size_t word = first_block; word <= last_block /* - 1*/; word++) { /* Step 3: Computing the value D[m,j] */ - scores[word] = static_cast(static_cast(scores[word]) + advance_block(word)); + scores[word] = static_cast(static_cast(scores[word]) + advance_block(word)); } max = static_cast( - std::min(static_cast(max), - static_cast(scores[last_block]) + - std::max(static_cast(s2.size()) - static_cast(row) - 1, - static_cast(s1.size()) - + std::min(static_cast(max), + static_cast(scores[last_block]) + + std::max(static_cast(s2.size()) - static_cast(row) - 1, + static_cast(s1.size()) - (static_cast((1 + last_block) * word_size - 1) - 1)))); /*---------- Adjust number of blocks according to Ukkonen ----------*/ @@ -724,9 +724,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range /* If block is not beneath band, calculate next block. Only next because others are certainly beneath * band. */ if (last_block + 1 < words) { - ssize_t cond = static_cast(max + 2 * word_size + row + s1.size()) - - static_cast(scores[last_block] + 2 + s2.size()); - if (static_cast(get_row_num(last_block)) < cond) { + ptrdiff_t cond = static_cast(max + 2 * word_size + row + s1.size()) - + static_cast(scores[last_block] + 2 + s2.size()); + if (static_cast(get_row_num(last_block)) < cond) { last_block++; vecs[last_block].VP = ~UINT64_C(0); vecs[last_block].VN = 0; @@ -736,7 +736,7 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range static_cast(HN_carry); // todo probably wrong types scores[last_block] = - static_cast(static_cast(scores[last_block]) + advance_block(last_block)); + static_cast(static_cast(scores[last_block]) + advance_block(last_block)); } } @@ -751,9 +751,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range * this uses a more loose condition similar to edlib: * https://github.com/Martinsos/edlib */ - ssize_t cond = static_cast(max + 2 * word_size + row + s1.size() + 1) - - static_cast(scores[last_block] + 2 + s2.size()); - bool in_band_cond2 = static_cast(get_row_num(last_block)) <= cond; + ptrdiff_t cond = static_cast(max + 2 * word_size + row + s1.size() + 1) - + static_cast(scores[last_block] + 2 + s2.size()); + bool in_band_cond2 = static_cast(get_row_num(last_block)) <= cond; if (in_band_cond1 && in_band_cond2) break; } @@ -767,9 +767,9 @@ auto levenshtein_hyrroe2003_block(const BlockPatternMatchVector& PM, const Range * if this condition is met for the last cell in the block, it * is met for all other cells in the blocks as well */ - ssize_t cond = static_cast(scores[first_block] + s1.size() + row) - - static_cast(max + s2.size()); - bool in_band_cond2 = static_cast(get_row_num(first_block)) >= cond; + ptrdiff_t cond = static_cast(scores[first_block] + s1.size() + row) - + static_cast(max + s2.size()); + bool in_band_cond2 = static_cast(get_row_num(first_block)) >= cond; if (in_band_cond1 && in_band_cond2) break; } diff --git a/rapidfuzz/fuzz_impl.hpp b/rapidfuzz/fuzz_impl.hpp index 075a62b9..18ca05b4 100644 --- a/rapidfuzz/fuzz_impl.hpp +++ b/rapidfuzz/fuzz_impl.hpp @@ -124,10 +124,10 @@ partial_ratio_impl(const detail::Range& s1, const detail::Range last */ size_t known_edits = detail::abs_diff(scores[window.first], scores[window.second]); /* half of the cells that are not needed for known_edits can lead to a better score */ - ssize_t min_score = - static_cast(std::min(scores[window.first], scores[window.second])) - - static_cast(cell_diff + known_edits / 2); - if (min_score < static_cast(cutoff_dist)) { + ptrdiff_t min_score = + static_cast(std::min(scores[window.first], scores[window.second])) - + static_cast(cell_diff + known_edits / 2); + if (min_score < static_cast(cutoff_dist)) { size_t center = cell_diff / 2; new_windows.emplace_back(window.first, window.first + center); new_windows.emplace_back(window.first + center, window.second);