-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5f3cbc6
commit 62556b7
Showing
7 changed files
with
242 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#pragma once | ||
|
||
#include <vector> | ||
#include <string> | ||
|
||
|
||
namespace wfa { | ||
|
||
std::vector<std::string> generate_sequences(int32_t length, int32_t count); | ||
|
||
std::vector<std::pair<std::string, std::string>> modify_sequences(int32_t length, int32_t count, double error_rate); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#pragma once | ||
|
||
#include "kokkos_simd.hpp" | ||
|
||
#include <string> | ||
#include <string_view> | ||
#include <vector> | ||
#include <array> | ||
#include <unordered_map> | ||
|
||
namespace wfa { | ||
//using wavefront_t = std::vector<std::array<std::vector<int32_t>, 3>>; | ||
int32_t constexpr match = 2; | ||
int32_t constexpr ins = 0; | ||
int32_t constexpr del = 1; | ||
|
||
struct wavefront_t { | ||
std::vector<std::array<std::vector<int32_t>, 3>> data; | ||
std::unordered_map<int32_t, int32_t> score_to_index; | ||
std::vector<std::array<int32_t, 2>> low_hi; | ||
|
||
int32_t lookup(int32_t score, int32_t column, int32_t row); | ||
int32_t wave_size(int32_t score, bool low); | ||
|
||
}; | ||
|
||
//bounded score lookup | ||
//int32_t bn_s(wavefront_t& wavefront, int32_t score, int32_t column, int32_t row); | ||
|
||
bool extend(wavefront_t& wavefront, std::string_view a, std::string_view b, int32_t score); | ||
|
||
void next(wavefront_t& wavefront, int32_t s, int32_t x, int32_t o, int32_t e); | ||
|
||
int32_t wavefront_simd(std::string_view a, std::string_view b, int32_t x, int32_t o, int32_t e); | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
set(WFA_SRC_FILES ${WFA_SRC_FILES} | ||
"src/naive.cpp" | ||
"src/kokkos_simd.cpp" | ||
"src/data_gen.cpp" | ||
PARENT_SCOPE | ||
) | ||
|
||
set(WFA_HEADER_FILES ${WFA_HEADER_FILES} | ||
"include/naive.hpp" | ||
"include/kokkos_simd.hpp" | ||
"include/data_gen.hpp" | ||
PARENT_SCOPE | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
#include "include/data_gen.hpp" | ||
#include <random> | ||
#include <array> | ||
|
||
constexpr std::array<char, 4> nt = { 'A', 'T', 'G', 'C' }; | ||
|
||
std::vector<std::string> wfa::generate_sequences(int32_t length, int32_t count) { | ||
std::vector<std::string> strings; | ||
|
||
std::random_device r; | ||
std::default_random_engine random(r()); | ||
std::uniform_int_distribution<int32_t> int_rand(0, 3); | ||
|
||
for (int32_t i = 0; i < count; ++i) { | ||
std::string str; | ||
for (int32_t j = 0; j < length; ++j) { | ||
str.push_back(nt[int_rand(random)]); | ||
} | ||
strings.emplace_back(std::move(str)); | ||
} | ||
|
||
return strings; | ||
} | ||
|
||
std::vector<std::pair<std::string, std::string>> wfa::modify_sequences(int32_t length, int32_t count, double error_rate) { | ||
auto strings = generate_sequences(length, count); | ||
|
||
std::random_device r; | ||
std::default_random_engine random(r()); | ||
std::uniform_int_distribution<int32_t> int_rand(0, 3); | ||
std::uniform_real_distribution<double> real_rand(0, 1); | ||
|
||
std::vector<std::pair<std::string, std::string>> sequences; | ||
for (auto& str : strings) { | ||
std::string modified = str; | ||
for (char& c : modified) { | ||
if (real_rand(random) < error_rate) { | ||
c = nt[int_rand(random)]; | ||
} | ||
} | ||
sequences.emplace_back(std::pair{str, modified}); | ||
} | ||
|
||
return sequences; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
#include "include/kokkos_simd.hpp" | ||
|
||
int32_t wfa::wavefront_t::lookup(int32_t score, int32_t column, int32_t row) { | ||
if (score < 0) { | ||
return 0; | ||
} | ||
if (row < 0) { | ||
return 0; | ||
} | ||
auto iter = score_to_index.find(score); | ||
if (iter == score_to_index.end()) { | ||
return 0; | ||
} | ||
if (row > data[iter->second][column].size() - 1) { | ||
return 0; | ||
} | ||
return (data[iter->second][column][row]); | ||
} | ||
|
||
int32_t wfa::wavefront_t::wave_size(int32_t score, bool low) { | ||
auto iter = score_to_index.find(score); | ||
if (iter == score_to_index.end()) { | ||
return 0; | ||
} | ||
auto& pair = low_hi[iter->second]; | ||
if (low) { | ||
return pair[0]; | ||
} | ||
return pair[1]; | ||
} | ||
|
||
//int32_t wfa::bn_s(wavefront_t& wavefront, int32_t score, int32_t column, int32_t row) { | ||
// if (score < 0) { | ||
// return 0; | ||
// } | ||
// return wavefront[score][column][row]; | ||
//} | ||
|
||
bool wfa::extend(wavefront_t& wavefront, std::string_view a, std::string_view b, int32_t score) { | ||
std::vector<int32_t>& matchfront_back = wavefront.data.back()[2]; | ||
int32_t k_low = wavefront.wave_size(score, true); | ||
int32_t k_high = wavefront.wave_size(score, false); | ||
for (int32_t k = k_low; k < k_high + 1; ++k) { | ||
int32_t starting_index = matchfront_back[k - k_low]; | ||
int32_t v = starting_index - k; | ||
int32_t h = starting_index; | ||
bool mismatch = false; | ||
while (not mismatch) { | ||
const char v_c = a.at(v); | ||
const char h_c = b.at(h); | ||
if (v_c == h_c) { | ||
if (v == static_cast<int32_t>(a.size()) - 1 and h == static_cast<int32_t>(b.size()) - 1) { | ||
return true; | ||
} | ||
++starting_index; | ||
++v; | ||
++h; | ||
} | ||
else { | ||
mismatch = true; | ||
} | ||
} | ||
matchfront_back[k - k_low] = starting_index; | ||
} | ||
return false; | ||
} | ||
|
||
void wfa::next(wavefront_t& wavefront, int32_t s, int32_t x, int32_t o, int32_t e) { | ||
int32_t m_high_sx = 0; | ||
int32_t m_low_sx = 0; | ||
int32_t m_high_soe = 0; | ||
int32_t m_low_soe = 0; | ||
int32_t i_high_se = 0; | ||
int32_t i_low_se = 0; | ||
int32_t d_high_se = 0; | ||
int32_t d_low_se = 0; | ||
if (s - x >= 0) { | ||
m_high_sx = wavefront.wave_size(s - x, false); | ||
m_low_sx = wavefront.wave_size(s - x, true); | ||
} | ||
if (s - o - e >= 0) { | ||
m_high_soe = wavefront.wave_size(s - o - e, false); | ||
m_low_soe = wavefront.wave_size(s - o - e, true); | ||
} | ||
if (s - e >= 0) { | ||
i_high_se = wavefront.wave_size(s - e, false); | ||
i_low_se = wavefront.wave_size(s - e, true); | ||
d_high_se = wavefront.wave_size(s - e, false); | ||
d_low_se = wavefront.wave_size(s - e, true); | ||
} | ||
|
||
int32_t high = std::max({m_high_sx, m_high_soe, i_high_se, d_high_se}) + 1; | ||
int32_t low = std::min({m_low_sx, m_low_soe, i_low_se, d_low_se}) - 1; | ||
|
||
auto& low_hi_cur = wavefront.low_hi.emplace_back(); | ||
low_hi_cur[0] = low; | ||
low_hi_cur[1] = high; | ||
|
||
auto& wave_cur = wavefront.data.emplace_back(); | ||
wavefront.score_to_index.emplace(s, static_cast<int32_t>(wavefront.data.size()) - 1); | ||
for (auto& vec : wave_cur) { | ||
vec.resize(high - low + 1); | ||
} | ||
for (int32_t k = low; k < high + 1; ++k) { | ||
wave_cur[ins][k - low] = std::max({wavefront.lookup(s - o - e, match, k - 1 - low), wavefront.lookup(s - e, ins, k - 1 - low)}) + 1; | ||
wave_cur[del][k - low] = std::max({ wavefront.lookup(s - o - e, match, k + 1 - low), wavefront.lookup(s - e, del, k + 1 - low) }); | ||
wave_cur[match][k - low] = std::max({ wave_cur[ins][k - low], wave_cur[del][k - low], wavefront.lookup(s - x, match, k - low) + 1}); | ||
} | ||
} | ||
|
||
int32_t wfa::wavefront_simd(std::string_view a, std::string_view b, int32_t x, int32_t o, int32_t e) { | ||
wavefront_t wavefront; | ||
wavefront.data.emplace_back(std::array<std::vector<int32_t>, 3>{std::vector<int32_t>{0}, std::vector<int32_t>{0}, std::vector<int32_t>{0}}); | ||
|
||
wavefront.low_hi.emplace_back(std::array{0,0}); | ||
wavefront.score_to_index.emplace(0, 0); | ||
bool matched = false; | ||
|
||
int32_t score = 0; | ||
while (not matched) { | ||
matched = extend(wavefront, a, b, score); | ||
if (matched) { | ||
break; | ||
} | ||
score = score + 1; | ||
next(wavefront, score, x, o, e); | ||
} | ||
return score; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,14 @@ | ||
#include "include/data_gen.hpp" | ||
#include "include/kokkos_simd.hpp" | ||
|
||
int main() { | ||
return 0; | ||
/*auto sequences = wfa::modify_sequences(100, 1000, 0.25); | ||
for (const auto& pair : sequences) { | ||
int32_t dist = wfa::wavefront_simd(pair.first, pair.second, 4, 6, 2); | ||
}*/ | ||
|
||
std::string a = "tert"; | ||
std::string b = "test"; | ||
wfa::wavefront_simd(a, b, 4, 6, 2); | ||
} |