Commit b503c68b authored by Dubský Jan's avatar Dubský Jan
Browse files

Solution 2.05 (Speed: 1.36)

parent 8fc0cd6a
......@@ -29,24 +29,31 @@ class levenstein_base {
protected:
levenstein_base(std::size_t a_size, std::size_t b_size, size_t width) :
A_SIZE(a_size), B_SIZE(b_size), WIDTH(width), ALIGNMENT(sizeof(uint32_t) * WIDTH), VEC_SIZE(vec_size(a_size + 1)), VEC_CNT(VEC_SIZE / WIDTH) {
zeroth_row = (uint32_t*)aligned_alloc(ALIGNMENT, VEC_SIZE * sizeof(uint32_t));
const size_t size = VEC_SIZE * sizeof(uint32_t) + CACHE_WIDTH;
zeroth_row = (uint32_t*)aligned_alloc(ALIGNMENT, size);
assert(zeroth_row);
zeroth_row += CACHE_PAD;
for (size_t i = 0; i < VEC_SIZE; ++i) zeroth_row[i] = i;
buffer1 = (uint32_t*)aligned_alloc(ALIGNMENT, VEC_SIZE * sizeof(uint32_t));
buffer1 = (uint32_t*)aligned_alloc(ALIGNMENT, size);
assert(buffer1);
buffer1 += CACHE_PAD;
buffer2 = (uint32_t*)aligned_alloc(ALIGNMENT, VEC_SIZE * sizeof(uint32_t));
buffer2 = (uint32_t*)aligned_alloc(ALIGNMENT, size);
assert(buffer2);
buffer2 += CACHE_PAD;
}
~levenstein_base() {
free(buffer1);
free(buffer2);
free(zeroth_row);
free(buffer1 - CACHE_PAD);
free(buffer2 - CACHE_PAD);
free(zeroth_row - CACHE_PAD);
}
protected:
static const size_t CACHE_WIDTH = 64;
static const size_t CACHE_PAD = CACHE_WIDTH / sizeof(uint32_t);
const size_t A_SIZE, B_SIZE;
const size_t WIDTH;
const size_t ALIGNMENT;
......@@ -151,16 +158,12 @@ public:
for (size_t i = 0; i < B_SIZE; ++i) {
__m128i currb = _mm_set1_epi32(b[i]);
prev_vec[-1] = i + 1;
for (size_t j = 0; j < VEC_CNT; ++j) {
__m128i prev = _mm_load_si128((__m128i*)(prev_vec + (WIDTH * j)));
__m128i curr = _mm_add_epi32(prev, ones);
__m128i prev_shr;
if (j) prev_shr = _mm_loadu_si128((__m128i*)(prev_vec + (WIDTH * j) - 1));
else {
prev_shr = _mm_slli_si128(prev, 4);
prev_shr = _mm_insert_epi32(prev_shr, (i + 1), 0);
}
__m128i prev_shr = _mm_loadu_si128((__m128i*)(prev_vec + (WIDTH * j) - 1));
__m128i curra = _mm_loadu_si128((__m128i*)(a + (WIDTH * j - 1)));
__m128i mask = _mm_cmpeq_epi32(curra, currb);
......
......@@ -2,4 +2,4 @@
cd "$(dirname -- "$0")" || exit 1
rsync -rP --inplace * mff-parlab:asgn/ --exclude build --exclude "gold-*" --exclude "*.vscode*"
rsync -rP --inplace * mff-parlab:asgn/ --exclude build --exclude "gold-*" --exclude "*.vscode*"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment