Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Klepl Jiří
asgn
Commits
17a1e97a
Commit
17a1e97a
authored
May 06, 2020
by
s_kleplj
Browse files
some changes
parent
b86a15d9
Changes
1
Hide whitespace changes
Inline
Side-by-side
sol/bsearchsol.hpp
View file @
17a1e97a
#ifndef bsearchsol_hpp_
#define bsearchsol_hpp_
#include <algorithm>
#include <cstdint>
#include <utility>
#include <limits>
#include <iostream>
#include <vector>
#include <immintrin.h>
...
...
@@ -49,7 +51,7 @@ struct policy_data_base<policy_avx> {
#ifdef USE_AVX512
template
<
>
struct
policy_data_base
<
policy_avx512
>
{
using
data_packed
=
std
::
uint32_t
[
16
]
;
using
data_packed
=
__m512i
;
};
#endif
...
...
@@ -66,7 +68,7 @@ private:
public:
using
value_type
=
std
::
size_t
;
static
constexpr
value_type
value
=
size
>
pack_size
*
pack_size
size
>
pack_size
?
get_jump_long
<
policy
,
size
/
pack_size
>::
value
*
pack_size
+
pack_size
:
0
;
};
...
...
@@ -83,14 +85,6 @@ public:
:
size
/
pack_size
;
};
template
<
typename
policy
,
std
::
size_t
size
>
struct
get_real_jump
{
private:
public:
using
value_type
=
std
::
size_t
;
static
constexpr
value_type
value
=
get_jump
<
policy
,
size
>::
value
/
policy_data
<
policy
>::
pack_size
;
};
template
<
typename
policy
,
std
::
size_t
size
>
struct
next_size
{
private:
...
...
@@ -103,8 +97,6 @@ public:
:
1
;
};
constexpr
std
::
size_t
i
=
get_jump
<
policy_avx512
,
8192
>::
value
;
template
<
typename
policy
>
class
bsearch_inner
{
using
data_packed
=
typename
policy_data
<
policy
>::
data_packed
;
...
...
@@ -115,60 +107,59 @@ class bsearch_inner {
,
isize
{
size
}
{
append_data
(
data
,
isize
/
pack_size
,
isize
);
for
(
std
::
size_t
i
=
0
;
i
<
isize
;
++
i
)
{
structure
.
emplace_back
(
data
[
i
]);
}
}
void
append_data
(
const
data_element
*
data
,
std
::
size_t
step
,
std
::
size_t
count
)
{
if
(
step
>
1
)
{
for
(
std
::
size_t
i
=
step
;
i
<=
count
;
i
+=
step
)
{
structure
.
emplace_back
(
data
[
i
-
1
]);
}
for
(
std
::
size_t
i
=
0
;
i
<=
count
;
i
+=
step
)
{
append_data
(
data
+
i
,
step
/
pack_size
,
step
);
}
for
(
std
::
size_t
i
=
step
;
i
<=
count
;
i
+=
step
)
{
structure
.
emplace_back
(
data
[
i
-
1
]);
}
if
(
step
>
1
)
for
(
std
::
size_t
i
=
0
;
i
<
count
;
i
+=
step
)
{
append_data
(
data
+
i
,
step
/
pack_size
==
0
?
1
:
step
/
pack_size
,
step
);
}
}
std
::
size_t
find
(
const
data_element
num
)
const
{
std
::
size_t
my_result
;
switch
(
isize
)
{
case
64
:
return
_find
<
64
>
(
0
,
num
);
my_result
=
_find
<
64
>
(
structure
.
data
(),
0
,
num
);
break
;
case
256
:
return
_find
<
256
>
(
0
,
num
);
my_result
=
_find
<
256
>
(
structure
.
data
(),
0
,
num
);
break
;
case
1024
:
return
_find
<
1024
>
(
0
,
num
);
my_result
=
_find
<
1024
>
(
structure
.
data
(),
0
,
num
);
break
;
case
4096
:
return
_find
<
4096
>
(
0
,
num
);
my_result
=
_find
<
4096
>
(
structure
.
data
(),
0
,
num
);
break
;
case
16384
:
return
_find
<
16384
>
(
0
,
num
);
my_result
=
_find
<
16384
>
(
structure
.
data
(),
0
,
num
);
break
;
case
65536
:
my_result
=
_find
<
65536
>
(
structure
.
data
(),
0
,
num
);
break
;
case
262144
:
return
_find
<
262144
>
(
0
,
num
);
my_result
=
_find
<
262144
>
(
structure
.
data
(),
0
,
num
);
break
;
case
1048576
:
return
_find
<
1048576
>
(
0
,
num
);
my_result
=
_find
<
1048576
>
(
structure
.
data
(),
0
,
num
);
break
;
}
return
0
;
return
my_result
;
}
const
std
::
size_t
&
size
()
const
{
return
isize
;
}
private:
template
<
std
::
size_t
size
>
std
::
size_t
_find
(
const
std
::
size_t
offset
,
const
data_element
num
)
const
;
static
std
::
size_t
constexpr
_find
(
const
data_element
*
offset
,
const
std
::
size_t
accumulator
,
const
data_element
num
);
std
::
vector
<
data_element
>
structure
;
std
::
size_t
isize
;
...
...
@@ -176,47 +167,66 @@ class bsearch_inner {
template
<
typename
policy
>
template
<
std
::
size_t
size
>
std
::
size_t
bsearch_inner
<
policy
>::
_find
(
const
std
::
size_t
offset
,
const
data_element
num
)
const
inline
constexpr
std
::
size_t
bsearch_inner
<
policy
>::
_find
(
const
data_element
*
offset
,
std
::
size_t
accumulator
,
const
data_element
num
)
{
return
0
;
}
static
const
__m128i
popcount_mask
=
_mm_set1_epi8
(
0x0F
);
static
const
__m128i
popcount_table
=
_mm_setr_epi8
(
0
,
1
,
1
,
2
,
1
,
2
,
2
,
3
,
1
,
2
,
2
,
3
,
2
,
3
,
3
,
4
);
static
inline
__m128i
popcnt8
(
__m128i
n
)
{
const
__m128i
pcnt0
=
_mm_shuffle_epi8
(
popcount_table
,
_mm_and_si128
(
n
,
popcount_mask
));
const
__m128i
pcnt1
=
_mm_shuffle_epi8
(
popcount_table
,
_mm_and_si128
(
_mm_srli_epi16
(
n
,
4
),
popcount_mask
));
return
_mm_add_epi8
(
pcnt0
,
pcnt1
);
template
<
>
template
<
std
::
size_t
size
>
inline
constexpr
std
::
size_t
bsearch_inner
<
policy_sse
>::
_find
(
const
data_element
*
offset
,
const
std
::
size_t
accumulator
,
const
data_element
num
)
{
const
auto
tmp
=
~
_mm_movemask_epi8
(
_mm_cmplt_epi32
(
_mm_set1_epi32
(
num
),
_mm_loadu_si128
((
data_packed
*
)
offset
)));
const
std
::
size_t
jump
=
_mm_popcnt_u32
(
tmp
&
0xFFFF
)
>>
2
;
if
constexpr
(
next_size
<
policy_sse
,
size
>::
value
==
1
)
{
return
accumulator
+
jump
;
}
else
{
if
(
jump
==
pack_size
)
{
return
accumulator
+
size
;
}
else
{
return
_find
<
next_size
<
policy_sse
,
size
>::
value
>
(
offset
+
pack_size
+
jump
*
get_jump
<
policy_sse
,
size
>::
value
,
accumulator
+
jump
*
next_size
<
policy_sse
,
size
>::
value
,
num
);
}
}
}
#ifdef USE_AVX
template
<
>
template
<
std
::
size_t
size
>
inline
std
::
size_t
bsearch_inner
<
policy_
sse
>::
_find
(
const
std
::
size_t
offset
,
const
data_element
num
)
const
inline
std
::
size_t
bsearch_inner
<
policy_
avx
>::
_find
(
const
data_element
*
offset
,
const
std
::
size_t
accumulator
,
const
data_element
num
)
{
auto
tmp
=
_mm_cmplt_epi32
(
_mm_loadu_si128
((
data_packed
*
)
&
structure
[
offset
]),
_mm_set1_epi32
(
num
));
return
_find
<
next_size
<
policy_sse
,
size
>::
value
>
(
offset
+
pack_size
+
(
__builtin_popcount
(
_mm_movemask_epi8
(
tmp
))
>>
2
)
*
get_jump
<
policy_sse
,
size
>::
value
,
num
);
const
auto
tmp
=
~
_mm256_movemask_epi8
(
_mm256_cmpgt_epi32
(
_mm256_loadu_si256
((
data_packed
*
)
offset
),
_mm256_set1_epi32
(
num
)));
const
std
::
size_t
jump
=
_mm_popcnt_u32
(
tmp
)
>>
2
;
if
constexpr
(
next_size
<
policy_avx
,
size
>::
value
==
1
)
{
return
accumulator
+
jump
;
}
else
{
if
(
jump
==
pack_size
)
{
return
accumulator
+
size
;
}
else
{
return
_find
<
next_size
<
policy_avx
,
size
>::
value
>
(
offset
+
pack_size
+
jump
*
get_jump
<
policy_avx
,
size
>::
value
,
accumulator
+
jump
*
next_size
<
policy_avx
,
size
>::
value
,
num
);
}
}
}
template
<
>
template
<
>
inline
std
::
size_t
bsearch_inner
<
policy_
sse
>::
_find
<
1
>
(
const
std
::
size_t
offset
,
const
data_element
num
)
const
inline
std
::
size_t
bsearch_inner
<
policy_
avx
>::
_find
<
4
>
(
const
data_element
*
offset
,
const
std
::
size_t
accumulator
,
const
data_element
num
)
{
auto
tmp
=
_mm_cmplt_epi32
(
_mm_loadu_si128
((
data_packed
*
)
&
structure
[
offset
]),
_mm_set1_epi32
(
num
));
return
__builtin_popcount
(
_mm_movemask_epi8
(
tmp
))
>>
2
;
const
auto
tmp
=
~
_mm_movemask_epi8
(
_mm_cmplt_epi32
(
_mm_set1_epi32
(
num
),
_mm_loadu_si128
((
policy_data
<
policy_sse
>::
data_packed
*
)
offset
)));
const
std
::
size_t
jump
=
_mm_popcnt_u32
(
tmp
&
0xFFFF
)
>>
2
;
return
accumulator
+
jump
;
}
template
<
std
::
size_t
base
,
std
::
size_t
exp
>
struct
pow
{
using
value_type
=
std
::
size_t
;
static
constexpr
value_type
value
=
pow
<
base
,
exp
-
1
>::
value
*
base
;
};
template
<
std
::
size_t
base
>
struct
pow
<
base
,
0
>
{
using
value_type
=
std
::
size_t
;
static
constexpr
value_type
value
=
1
;
};
template
<
>
template
<
>
inline
std
::
size_t
bsearch_inner
<
policy_avx
>::
_find
<
2
>
(
const
data_element
*
offset
,
const
std
::
size_t
accumulator
,
const
data_element
num
)
{
return
0
;
}
#endif
template
<
typename
policy
>
class
bsearch_outer
{
...
...
@@ -232,9 +242,14 @@ public:
void
bucketize
(
const
data_element
*
data
)
// size of data is osize
{
for
(
auto
&&
bucket
:
buckets_
)
{
bucket
.
clear
();
}
for
(
const
data_element
*
p_data
=
data
+
osize_
;
--
p_data
!=
data
;
const
data_element
*
p_data
=
data
;
p_data
!=
data
+
osize_
;
++
p_data
)
{
buckets_
[
inner_
.
find
(
*
p_data
)].
emplace_back
(
*
p_data
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment