Commit 6156dbd5 authored by s_kleplj's avatar s_kleplj
Browse files

.

parent 7870e06c
#ifndef bsearchsol_hpp_
#define bsearchsol_hpp_
#define USE_AVX
#include <algorithm>
#include <cstdint>
#include <limits>
#include <utility>
#include <vector>
......@@ -107,39 +107,28 @@ class bsearch_inner {
}
}
std::size_t find(const data_element num) const
std::size_t find(data_element num) const
{
std::size_t my_result;
switch (isize)
{
default:
case 64:
my_result = _find<64>(structure.data(), 0, num);
break;
return _find<64>(structure.data(), 0, num);
case 256:
my_result = _find<256>(structure.data(), 0, num);
break;
return _find<256>(structure.data(), 0, num);
case 1024:
my_result = _find<1024>(structure.data(), 0, num);
break;
return _find<1024>(structure.data(), 0, num);
case 4096:
my_result = _find<4096>(structure.data(), 0, num);
break;
return _find<4096>(structure.data(), 0, num);
case 16384:
my_result = _find<16384>(structure.data(), 0, num);
break;
return _find<16384>(structure.data(), 0, num);
case 65536:
my_result = _find<65536>(structure.data(), 0, num);
break;
return _find<65536>(structure.data(), 0, num);
case 262144:
my_result = _find<262144>(structure.data(), 0, num);
break;
return _find<262144>(structure.data(), 0, num);
case 1048576:
my_result = _find<1048576>(structure.data(), 0, num);
break;
return _find<1048576>(structure.data(), 0, num);
}
return my_result;
}
const std::size_t &size() const { return isize; }
......@@ -152,18 +141,24 @@ class bsearch_inner {
std::size_t isize;
};
template<typename policy>
template<std::size_t size>
inline std::size_t bsearch_inner<policy>::_find(const data_element* offset, std::size_t accumulator, const data_element num)
template<>
inline bsearch_inner<policy_scalar>::bsearch_inner(const data_element* data, std::size_t size)
: structure(data, data + size)
, isize{size}
{
}
template<>
inline std::size_t bsearch_inner<policy_scalar>::find(data_element num) const
{
return 0;
return std::upper_bound(structure.cbegin(), structure.cend(), num) - structure.cbegin();
}
template<>
template<std::size_t size>
inline std::size_t bsearch_inner<policy_sse>::_find(const data_element* offset, const std::size_t accumulator, const data_element num)
{
const auto tmp = ~_mm_movemask_epi8(_mm_cmplt_epi32(_mm_set1_epi32(num), _mm_lddq_si128((data_packed*)offset)));
const auto tmp = ~_mm_movemask_epi8(_mm_cmplt_epi32(_mm_set1_epi32(num), _mm_loadu_si128((data_packed*)offset)));
const std::size_t jump = _mm_popcnt_u32(tmp & 0xFFFF) >> 2;
if constexpr (next_size<policy_sse,size>::value == 1) {
return accumulator + jump;
......@@ -208,6 +203,7 @@ inline std::size_t bsearch_inner<policy_avx>::_find<4>(const data_element* offse
const std::size_t jump = _mm_popcnt_u32(tmp & 0xFFFF) >> 2;
return accumulator + jump;
}
template<>
template<>
inline std::size_t bsearch_inner<policy_avx>::_find<2>(const data_element* offset, const std::size_t accumulator, const data_element num)
......@@ -220,6 +216,69 @@ inline std::size_t bsearch_inner<policy_avx>::_find<2>(const data_element* offse
}
#endif
#ifdef USE_AVX512
template<>
inline bsearch_inner<policy_avx512>::bsearch_inner(const data_element* data, std::size_t size)
: structure{}
, isize{size}
{
append_data(data, isize / pack_size, isize);
for (std::size_t i = 0; i < pack_size; ++i) {
structure.emplace_back(std::numeric_limits<data_element>::max());
}
}
template<>
template<std::size_t size>
inline std::size_t bsearch_inner<policy_avx512>::_find(const data_element* offset, const std::size_t accumulator, const data_element num)
{
const auto tmp = _mm512_cmple_epi32_mask(_mm512_loadu_si512(offset), _mm512_set1_epi32(num));
const std::size_t jump = _mm_popcnt_u32(tmp);
if constexpr (next_size<policy_avx512, size>::value == 1) {
return accumulator + jump;
} else {
if (jump == pack_size) {
return accumulator + size;
} else {
return _find<next_size<policy_avx512, size>::value>(
offset + pack_size + jump * get_jump<policy_avx512, size>::value,
accumulator + jump * next_size<policy_avx512, size>::value,
num);
}
}
}
template<>
template<>
inline std::size_t bsearch_inner<policy_avx512>::_find<8>(const data_element* offset, const std::size_t accumulator, const data_element num)
{
const auto tmp = _mm512_mask_cmple_epi32_mask(0xFF, _mm512_loadu_si512(offset), _mm512_set1_epi32(num));
const std::size_t jump = _mm_popcnt_u32(tmp);
return accumulator + jump;
}
template<>
template<>
inline std::size_t bsearch_inner<policy_avx512>::_find<4>(const data_element* offset, const std::size_t accumulator, const data_element num)
{
const auto tmp = _mm512_mask_cmple_epi32_mask(0xF, _mm512_loadu_si512(offset), _mm512_set1_epi32(num));
const std::size_t jump = _mm_popcnt_u32(tmp);
return accumulator + jump;
}
template<>
template<>
inline std::size_t bsearch_inner<policy_avx512>::_find<2>(const data_element* offset, const std::size_t accumulator, const data_element num)
{
return accumulator + (*offset<=num
? *(offset+1) <= num
? 2
: 1
: 0);
}
#endif
template<typename policy>
class bsearch_outer {
using data_packed = typename policy_data<policy>::data_packed;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment