Commit 5555a63b authored by s_kleplj's avatar s_kleplj
Browse files

saved progress

parent 4d68a86e
......@@ -5,15 +5,19 @@
#include <cstdint>
#include <cstddef>
#include <utility>
#include <immintrin.h>
namespace levensol {
template< typename policy>
class levenstein {
public:
public:
levenstein(std::size_t a_size, std::size_t b_size) :
a_size_{a_size},
b_size_{b_size}
b_size_{b_size},
fst_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1),
snd_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1),
tmp_vector(((std::max(a_size, b_size)/16 - 1) | 15) + 1)
{
}
......@@ -21,63 +25,51 @@ namespace levensol {
// a_size >= b_size
std::uint32_t compute_impl(const std::uint32_t* a, const std::uint32_t* b, const std::size_t a_size, const std::size_t b_size)
{
const std::size_t size = a_size + b_size - 1;
std::uint32_t arr[size + 2];
for (std::size_t i = 0; i < a_size; ++i) {
arr[i] = a_size - i;
for (std::size_t i = 0; i <= b_size; ++i) {
((std::uint32_t*)(&fst_vector[0]))[i] = i;
}
for (std::size_t i = a_size; i < size + 2; ++i) {
arr[i] = i - a_size;
}
for (std::size_t x = 0; x < a_size; ++x) {
// snd_vector[0] = x + 1;
__m512i a_x = _mm512_set1_epi32(a[x]);
for (std::size_t i = 0; i < b_size; ++i) {
for (std:: size_t j = a_size - i, y = 0; j <= a_size + i; j += 2, ++y) {
std::uint32_t tmp = std::min(
arr[j - 1] + 1,
arr[j + 1] + 1);
arr[j] = std::min(
tmp,
arr[j] + (a[i - y] == b[y] ? 0 : 1));
for (std::size_t y = 0; y < b_size / 16; ++y) {
_mm512_store_epi32((std::uint32_t*)&tmp_vector[y] + 1, fst_vector[y]);
}
}
for (std::size_t i = b_size; i < a_size; ++i) {
for (std:: size_t j = a_size - i, y = 0; j <= size + b_size - i - 1; j += 2, ++y) {
std::uint32_t tmp = std::min(
arr[j - 1] + 1,
arr[j + 1] + 1);
arr[j] = std::min(
tmp,
arr[j] + (a[i - y] == b[y] ? 0 : 1));
std::size_t y = 0;
for (; y < b_size; y += 16) {
__m512i b_y = _mm512_load_epi32(b + y);
auto mask = _mm512_cmpeq_epi32_mask (a_x, b_y);
auto tmp = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), mask, _mm512_set1_epi32(0));
tmp = _mm512_add_epi32(tmp, tmp_vector[y/16]);
auto tmp2 = _mm512_add_epi32(fst_vector[y/16], _mm512_set1_epi32(1));
tmp = _mm512_min_epi32(tmp, tmp2);
snd_vector[y/16] = tmp;
}
}
for (std::size_t i = 0; i < b_size; ++i) {
for (std:: size_t j = i + 1, y = 0; j <= 2 * b_size - i; j += 2, ++y) {
std::uint32_t tmp = std::min(
arr[j - 1] + 1,
arr[j + 1] + 1);
arr[j] = std::min(
tmp,
arr[j] + (a[a_size - y - 1] == b[i + y] ? 0 : 1));
for (y -= 16; y < b_size; ++y) {
}
snd_vector.swap(fst_vector);
}
return arr[b_size];
return 0;
}
std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b)
{
{
if (a_size_ < b_size_) {
return compute_impl(b, a, b_size_, a_size_);
} else {
return compute_impl(a, b, a_size_, b_size_);
} else {
return compute_impl(b, a, b_size_, a_size_);
}
}
const std::size_t a_size_, b_size_;
std::vector<__m512i> fst_vector, snd_vector, tmp_vector;
};
struct policy_sse {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment