Commit 17a1e97a authored by s_kleplj's avatar s_kleplj
Browse files

some changes

parent b86a15d9
#ifndef bsearchsol_hpp_
#define bsearchsol_hpp_
#include <algorithm>
#include <cstdint>
#include <utility>
#include <limits>
#include <iostream>
#include <vector>
#include <immintrin.h>
......@@ -49,7 +51,7 @@ struct policy_data_base<policy_avx> {
#ifdef USE_AVX512
template<>
struct policy_data_base<policy_avx512> {
using data_packed = std::uint32_t[16];
using data_packed = __m512i;
};
#endif
......@@ -66,7 +68,7 @@ private:
public:
using value_type = std::size_t;
static constexpr value_type value =
size > pack_size * pack_size
size > pack_size
? get_jump_long<policy, size / pack_size>::value * pack_size + pack_size
: 0;
};
......@@ -83,14 +85,6 @@ public:
: size / pack_size;
};
template<typename policy, std::size_t size>
struct get_real_jump {
private:
public:
using value_type = std::size_t;
static constexpr value_type value = get_jump<policy, size>::value / policy_data<policy>::pack_size;
};
template<typename policy, std::size_t size>
struct next_size {
private:
......@@ -103,8 +97,6 @@ public:
: 1;
};
constexpr std::size_t i = get_jump<policy_avx512, 8192>::value;
template<typename policy>
class bsearch_inner {
using data_packed = typename policy_data<policy>::data_packed;
......@@ -115,60 +107,59 @@ class bsearch_inner {
, isize{size}
{
append_data(data, isize / pack_size, isize);
for (std::size_t i = 0; i < isize; ++i) {
structure.emplace_back(data[i]);
}
}
void append_data(const data_element* data, std::size_t step, std::size_t count)
{
if (step > 1) {
for (std::size_t i = step; i <= count; i += step) {
structure.emplace_back(data[i - 1]);
}
for (std::size_t i = 0; i <= count; i += step) {
append_data(data + i, step / pack_size, step);
}
for (std::size_t i = step; i <= count; i += step) {
structure.emplace_back(data[i - 1]);
}
if (step > 1) for (std::size_t i = 0; i < count; i += step) {
append_data(data + i, step / pack_size == 0 ? 1 : step / pack_size, step);
}
}
std::size_t find(const data_element num) const
{
std::size_t my_result;
switch (isize)
{
case 64:
return _find<64>(0, num);
my_result = _find<64>(structure.data(), 0, num);
break;
case 256:
return _find<256>(0, num);
my_result = _find<256>(structure.data(), 0, num);
break;
case 1024:
return _find<1024>(0, num);
my_result = _find<1024>(structure.data(), 0, num);
break;
case 4096:
return _find<4096>(0, num);
my_result = _find<4096>(structure.data(), 0, num);
break;
case 16384:
return _find<16384>(0, num);
my_result = _find<16384>(structure.data(), 0, num);
break;
case 65536:
my_result = _find<65536>(structure.data(), 0, num);
break;
case 262144:
return _find<262144>(0, num);
my_result = _find<262144>(structure.data(), 0, num);
break;
case 1048576:
return _find<1048576>(0, num);
my_result = _find<1048576>(structure.data(), 0, num);
break;
}
return 0;
return my_result;
}
const std::size_t &size() const { return isize; }
private:
template<std::size_t size>
std::size_t _find(const std::size_t offset, const data_element num) const;
static std::size_t constexpr _find(const data_element* offset, const std::size_t accumulator, const data_element num);
std::vector<data_element> structure;
std::size_t isize;
......@@ -176,47 +167,66 @@ class bsearch_inner {
template<typename policy>
template<std::size_t size>
std::size_t bsearch_inner<policy>::_find(const std::size_t offset, const data_element num) const
inline constexpr std::size_t bsearch_inner<policy>::_find(const data_element* offset, std::size_t accumulator, const data_element num)
{
return 0;
}
static const __m128i popcount_mask = _mm_set1_epi8(0x0F);
static const __m128i popcount_table = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
static inline __m128i popcnt8(__m128i n) {
const __m128i pcnt0 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(n, popcount_mask));
const __m128i pcnt1 = _mm_shuffle_epi8(popcount_table, _mm_and_si128(_mm_srli_epi16(n, 4), popcount_mask));
return _mm_add_epi8(pcnt0, pcnt1);
template<>
template<std::size_t size>
inline constexpr std::size_t bsearch_inner<policy_sse>::_find(const data_element* offset, const std::size_t accumulator, const data_element num)
{
const auto tmp = ~_mm_movemask_epi8(_mm_cmplt_epi32(_mm_set1_epi32(num), _mm_loadu_si128((data_packed*)offset)));
const std::size_t jump = _mm_popcnt_u32(tmp & 0xFFFF) >> 2;
if constexpr (next_size<policy_sse,size>::value == 1) {
return accumulator + jump;
} else {
if (jump == pack_size) {
return accumulator + size;
} else {
return _find<next_size<policy_sse,size>::value>(
offset + pack_size + jump * get_jump<policy_sse,size>::value,
accumulator + jump * next_size<policy_sse,size>::value,
num);
}
}
}
#ifdef USE_AVX
template<>
template<std::size_t size>
inline std::size_t bsearch_inner<policy_sse>::_find(const std::size_t offset, const data_element num) const
inline std::size_t bsearch_inner<policy_avx>::_find(const data_element* offset, const std::size_t accumulator, const data_element num)
{
auto tmp = _mm_cmplt_epi32(_mm_loadu_si128((data_packed*)&structure[offset]), _mm_set1_epi32(num));
return _find<next_size<policy_sse,size>::value>(offset + pack_size+ (__builtin_popcount(_mm_movemask_epi8(tmp)) >> 2) * get_jump<policy_sse,size>::value, num);
const auto tmp = ~_mm256_movemask_epi8(_mm256_cmpgt_epi32(_mm256_loadu_si256((data_packed*)offset), _mm256_set1_epi32(num)));
const std::size_t jump = _mm_popcnt_u32(tmp) >> 2;
if constexpr (next_size<policy_avx,size>::value == 1) {
return accumulator + jump;
} else {
if (jump == pack_size) {
return accumulator + size;
} else {
return _find<next_size<policy_avx,size>::value>(
offset + pack_size + jump * get_jump<policy_avx,size>::value,
accumulator + jump * next_size<policy_avx,size>::value,
num);
}
}
}
template<>
template<>
inline std::size_t bsearch_inner<policy_sse>::_find<1>(const std::size_t offset, const data_element num) const
inline std::size_t bsearch_inner<policy_avx>::_find<4>(const data_element* offset, const std::size_t accumulator, const data_element num)
{
auto tmp = _mm_cmplt_epi32(_mm_loadu_si128((data_packed*)&structure[offset]), _mm_set1_epi32(num));
return __builtin_popcount(_mm_movemask_epi8(tmp)) >> 2;
const auto tmp = ~_mm_movemask_epi8(_mm_cmplt_epi32(_mm_set1_epi32(num), _mm_loadu_si128((policy_data<policy_sse>::data_packed*)offset)));
const std::size_t jump = _mm_popcnt_u32(tmp & 0xFFFF) >> 2;
return accumulator + jump;
}
template<std::size_t base, std::size_t exp>
struct pow {
using value_type = std::size_t;
static constexpr value_type value = pow<base, exp - 1>::value * base;
};
template<std::size_t base>
struct pow<base, 0> {
using value_type = std::size_t;
static constexpr value_type value = 1;
};
template<>
template<>
inline std::size_t bsearch_inner<policy_avx>::_find<2>(const data_element* offset, const std::size_t accumulator, const data_element num)
{
return 0;
}
#endif
template<typename policy>
class bsearch_outer {
......@@ -232,9 +242,14 @@ public:
void bucketize(const data_element* data) // size of data is osize
{
for (auto&& bucket : buckets_) {
bucket.clear();
}
for (
const data_element* p_data = data + osize_;
--p_data != data;
const data_element* p_data = data;
p_data != data + osize_;
++p_data
) {
buckets_[inner_.find(*p_data)].emplace_back(*p_data);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment