Commit eddfbce7 authored by Dubský Jan's avatar Dubský Jan
Browse files

Solution 4.04 (Speed: 1.45)

parent c45d09cf
......@@ -80,7 +80,7 @@ public:
assert(a.HS == b.VS);
for (size_t j = 0; j < HS; ++j) {
// This is useless - compiles vectorizes normal loop here
// This is useless - compiles vectorizes normal loop here
const size_t B_COPY_SIZE = (a.HS + VEC_CNT - 1) / VEC_CNT * VEC_CNT;
matrix_element b_copy[B_COPY_SIZE];
for (size_t i = 0; i < B_COPY_SIZE / VEC_CNT; ++i) {
......@@ -101,11 +101,8 @@ public:
res_vec = _mm_min_epu16(res_vec, val);
}
_mm_store_si128((__m128i*)aggr_arr, res_vec);
matrix_element res = res_init_val;
for (size_t k = 0; k < VEC_CNT; ++k) res = std::min(res, aggr_arr[k]);
addr(i, j) = res;
__m128i mn = _mm_minpos_epu16(res_vec);
addr(i, j) = _mm_extract_epi16(mn, 0);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment