Commit 9d3c4bbd authored by Dubský Jan's avatar Dubský Jan
Browse files

Solution 2.08.2 (Speed: 0.96)

parent 6be4c384
#include "levensol.hpp"
namespace levensol {
} // namespace levensol
#if 0
template <>
std::uint32_t levenstein<policy_avx>::compute(const std::uint32_t* a, const std::uint32_t* b) {
//for (size_t i = 0; i < a_size + 1; ++i) prev.push_back(i);
int main() {
const size_t len = 32;
levensol::levenstein<levensol::policy_sse> lev(len, len);
uint32_t a[len], b[len];
for (size_t i = 0; i < len; ++i) {
a[i] = i;
b[i] = len + i;
}
for (size_t i = 0; i < 5; ++i) {
uint32_t dist = lev.compute(a, b);
std::cout << dist << std::endl;
}
return 0;
}
#endif
} // namespace levensol
......@@ -38,8 +38,6 @@ protected:
buffer2 = (uint32_t*)aligned_alloc(ALIGNMENT, size);
assert(buffer2);
buffer2 += CACHE_PAD;
for (size_t i = 0; i < VEC_SIZE; ++i) buffer1[i] = buffer2[i] = -1;
}
// Unfortunately, destructor is probably included in measured part of solution.
......@@ -74,7 +72,6 @@ private:
};
// As part of testing, scalar levenstein was implemented too
#if 0
struct policy_scalar {};
template <>
......@@ -102,7 +99,6 @@ public:
return buffer1[A_SIZE];
}
};
#endif
// Wasn't able to find the way, how to force GCC to unrol loop with fixed range.
#define gen_min_loop_128(k) \
......@@ -158,6 +154,12 @@ public:
_mm_store_si128((__m128i*)(curr_vec + k), curr);
}
// Upper rigth triangle is empty, so we must set it's value if it's going to be required in another step
const size_t end_ind = WIDTH * upper;
if (i % WIDTH == 1 && upper < VEC_CNT) {
_mm_store_si128((__m128i*)(curr_vec + end_ind), _mm_set1_epi32((i + 2) + (end_ind + WIDTH)));
}
std::swap(prev_vec, curr_vec);
if (!i) curr_vec = buffer1;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment