Commit 2fcd607c authored by s_kleplj's avatar s_kleplj
Browse files

reverted the last 'optimization'

parent 081130ce
......@@ -201,21 +201,21 @@ if constexpr (std::is_same<policy, policy_avx512>::value) {
cycles = std::min(a_size - x, b_size - y) / multiplier;
for (; cycles != 0; x += multiplier, y += multiplier, d += multiplier, --cycles) {
auto tmp = _mm512_maskz_set1_epi32(
_mm512_cmpeq_epu32_mask(
_mm512_loadu_si512(a_v + x),
_mm512_loadu_si512(b_v + y)),
-1);
const auto mask = _mm512_cmpeq_epu32_mask(
_mm512_loadu_si512(a_v + x),
_mm512_loadu_si512(b_v + y));
if (odd) {
tmp = _mm512_add_epi32(tmp, *(data_pointer)(odd_v + d));
data_type tmp = *(data_pointer)(odd_v + d);
tmp = _mm512_mask_sub_epi32(tmp, mask, tmp, _mm512_set1_epi32(1));
tmp = _mm512_min_epu32(*(data_pointer)(even_v + d), tmp);
const data_type tmp2 = _mm512_loadu_si512(even_v + d + 1);
tmp = _mm512_min_epu32(tmp, tmp2);
*(data_pointer)(odd_v + d) = _mm512_add_epi32(tmp, _mm512_set1_epi32(1));
} else {
tmp = _mm512_add_epi32(tmp, *(data_pointer)(even_v + d));
data_type tmp = *(data_pointer)(even_v + d);
tmp = _mm512_mask_sub_epi32(tmp, mask, tmp, _mm512_set1_epi32(1));
tmp = _mm512_min_epu32(*(data_pointer)(odd_v + d), tmp);
const data_type tmp2 = _mm512_loadu_si512(odd_v + d - 1);
tmp = _mm512_min_epu32(tmp, tmp2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment