Commit afbc226a authored by Dubský Jan's avatar Dubský Jan
Browse files

Solution 2.07.1 (Speed: 1.06)

parent 15c7f11e
......@@ -106,40 +106,6 @@ public:
}
};
//#define DEBUG_MODE
#ifdef DEBUG_MODE
inline uint32_t comp(size_t a_size, size_t b_size, const uint32_t* a, const uint32_t* b) {
levenstein<policy_scalar> l(a_size, b_size);
return l.compute(a, b);
}
inline void prt_diff(size_t len, const uint32_t* a, const uint32_t* b) {
std::cout << "DIFF:";
for (size_t i = 0; i < len; ++i) {
if (a[i] == b[i]) std::cout << " " << a[i];
else
std::cout << " \e[33m" << a[i] << ":" << b[i] << "\e[00m";
}
std::cout << std::endl;
}
inline void prt_arr(size_t len, const uint32_t* array, const char* title = nullptr) {
if (title) std::cout << title << ": ";
for (size_t k = 0; k < len + 1; ++k) std::cout << array[k] << " ";
std::cout << std::endl;
}
template <typename U>
inline void prt_vec(U vec, const char* desc) {
constexpr size_t SZ = sizeof(U) / sizeof(uint32_t);
uint32_t prt_arr[SZ];
_mm_store_si128((__m128i*)(prt_arr), vec);
std::cout << desc << ": ";
return;
#endif
#define gen_min_loop_128(k) \
{ \
prev_val += 1; \
......@@ -149,16 +115,14 @@ inline void prt_vec(U vec, const char* desc) {
prev_val = curr_val; \
}
template <>
class levenstein<policy_sse> : levenstein_base {
public:
template <>
class levenstein<policy_sse> : levenstein_base {
public:
levenstein(std::size_t a_size, std::size_t b_size) :
levenstein_base(a_size, b_size, 4) {}
//std::uint32_t compute(const std::uint32_t* __restrict__ a, const std::uint32_t* __restrict__ b) {
std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b) {
if (SWAP) std::swap(a, b);
//__m128i *prev_vec = (__m128i*)zeroth_row, *curr_vec = (__m128i*)buffer2;
uint32_t *prev_vec = zeroth_row, *curr_vec = buffer2;
for (size_t i = 0; i < B_SIZE; ++i) {
......@@ -180,58 +144,31 @@ inline void prt_vec(U vec, const char* desc) {
next_prev_shr = _mm_loadu_si128((__m128i*)(prev_vec + (WIDTH * (j + 1)) - 1));
curra = _mm_loadu_si128((__m128i*)(a + (WIDTH * (j + 1) - 1)));
#if 1
gen_min_loop_128(0);
gen_min_loop_128(1);
gen_min_loop_128(2);
gen_min_loop_128(3);
#endif
_mm_store_si128((__m128i*)(curr_vec + (WIDTH * j)), curr);
}
#if 0
#if 0
uint32_t prev = curr_vec[0];
for (size_t j = 1; j < A_SIZE + 1; ++j) {
prev += 1;
uint32_t curr = curr_vec[j];
if (prev < curr) curr_vec[j] = prev;
else
prev = curr;
}
#else
uint32_t prev_val = curr_vec[0] - 1;
for (size_t j = 0; j < VEC_CNT; ++j) {
auto addr = (__m128i*)(curr_vec + j * WIDTH);
__m128i curr = _mm_load_si128(addr);
gen_min_loop_128(0);
gen_min_loop_128(1);
gen_min_loop_128(2);
gen_min_loop_128(3);
_mm_store_si128(addr, curr);
_mm_store_si128((__m128i*)(curr_vec + (WIDTH * j)), curr);
}
#endif
#endif
std::swap(prev_vec, curr_vec);
if (!i) curr_vec = buffer1;
//if (!i) prev_vec = buffer1;
}
return prev_vec[A_SIZE];
}
private:
private:
const __m128i ones = _mm_set1_epi32(1);
};
};
// FIXME: Remove 0 in condition
#if USE_AVX && 0
template <>
class levenstein<policy_avx> : levenstein_base {
public:
template <>
class levenstein<policy_avx> : levenstein_base {
public:
levenstein(std::size_t a_size, std::size_t b_size) :
levenstein_base(a_size, b_size, 8) {}
......@@ -265,7 +202,7 @@ inline void prt_vec(U vec, const char* desc) {
return prev_vec[A_SIZE];
}
};
};
#else
......@@ -280,12 +217,12 @@ public:
#if USE_AVX512
template <>
class levenstein<policy_avx512> : public levenstein<policy_sse> {
public:
template <>
class levenstein<policy_avx512> : public levenstein<policy_sse> {
public:
levenstein(std::size_t a_size, std::size_t b_size) :
levenstein<policy_sse>(a_size, b_size){};
};
};
#endif
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment