Commit 9d4e96b8 authored by s_kleplj's avatar s_kleplj
Browse files

solution ~3.0x

parent 5555a63b
......@@ -15,9 +15,8 @@ namespace levensol {
levenstein(std::size_t a_size, std::size_t b_size) :
a_size_{a_size},
b_size_{b_size},
fst_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1),
snd_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1),
tmp_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1)
fst_vector(std::max(a_size, b_size) + 1, 0),
snd_vector(std::max(a_size, b_size) + 1, 0)
{
}
......@@ -26,37 +25,26 @@ namespace levensol {
std::uint32_t compute_impl(const std::uint32_t* a, const std::uint32_t* b, const std::size_t a_size, const std::size_t b_size)
{
for (std::size_t i = 0; i <= b_size; ++i) {
((std::uint32_t*)(&fst_vector[0]))[i] = i;
fst_vector[i] = i;
}
for (std::size_t x = 0; x < a_size; ++x) {
// snd_vector[0] = x + 1;
__m512i a_x = _mm512_set1_epi32(a[x]);
for (std::size_t y = 0; y < b_size / 16; ++y) {
_mm512_store_epi32((std::uint32_t*)&tmp_vector[y] + 1, fst_vector[y]);
}
std::size_t y = 0;
for (; y < b_size; y += 16) {
__m512i b_y = _mm512_load_epi32(b + y);
auto mask = _mm512_cmpeq_epi32_mask (a_x, b_y);
auto tmp = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), mask, _mm512_set1_epi32(0));
tmp = _mm512_add_epi32(tmp, tmp_vector[y/16]);
auto tmp2 = _mm512_add_epi32(fst_vector[y/16], _mm512_set1_epi32(1));
tmp = _mm512_min_epi32(tmp, tmp2);
snd_vector[y/16] = tmp;
}
for (y -= 16; y < b_size; ++y) {
snd_vector[0] = x + 1;
std::uint32_t a_x = a[x];
for (std::size_t y = 0; y < b_size; ++y) {
std::uint32_t tmp = std::min(
fst_vector[y] + (a_x == b[y] ? 0 : 1),
fst_vector[y + 1] + 1);
snd_vector[y + 1] = std::min(
tmp,
snd_vector[y] + 1);
}
snd_vector.swap(fst_vector);
}
return 0;
return fst_vector[b_size];
}
std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b)
......@@ -69,7 +57,7 @@ namespace levensol {
}
const std::size_t a_size_, b_size_;
std::vector<__m512i> fst_vector, snd_vector, tmp_vector;
std::vector<std::uint32_t> fst_vector, snd_vector;
};
struct policy_sse {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment