Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dubský Jan
asgn
Commits
6be4c384
Commit
6be4c384
authored
Apr 13, 2020
by
Dubský Jan
Browse files
Solution 2.08.1 (Speed: 0.96)
parent
4441a98d
Changes
1
Hide whitespace changes
Inline
Side-by-side
sol/levensol.hpp
View file @
6be4c384
...
...
@@ -12,13 +12,13 @@
namespace
levensol
{
template
<
typename
policy
>
class
levenstein
;
struct
policy_sse
{};
struct
policy_avx
{};
struct
policy_avx512
{};
template
<
typename
policy
>
class
levenstein
;
class
levenstein_base
{
protected:
levenstein_base
(
std
::
size_t
a_size
,
std
::
size_t
b_size
,
size_t
width
)
:
...
...
@@ -42,6 +42,8 @@ protected:
for
(
size_t
i
=
0
;
i
<
VEC_SIZE
;
++
i
)
buffer1
[
i
]
=
buffer2
[
i
]
=
-
1
;
}
// Unfortunately, destructor is probably included in measured part of solution.
// Due to this fact, proper cleanup is costly, so I just drop pointers and hope there will be enough memory.
~
levenstein_base
()
{
return
;
free
(
buffer1
-
CACHE_PAD
);
...
...
@@ -71,6 +73,7 @@ private:
}
};
// As part of testing, scalar levenstein was implemented too
#if 0
struct policy_scalar {};
...
...
@@ -101,6 +104,7 @@ public:
};
#endif
// Wasn't able to find the way, how to force GCC to unrol loop with fixed range.
#define gen_min_loop_128(k) \
{
\
prev_val
+=
1
;
\
...
...
@@ -172,68 +176,6 @@ public:
levenstein
<
policy_sse
>
(
a_size
,
b_size
){};
};
// FIXME: Remove 0 in condition
#if USE_AVX && 0
template
<
>
class
levenstein
<
policy_avx
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
8
)
{}
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
#ifdef DEBUG_MODE
return
comp
(
A_SIZE
,
B_SIZE
,
a
,
b
);
#endif
const
__m256i
ones
=
_mm256_set1_epi32
(
1
);
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m256i
currb
=
_mm256_set1_epi32
(
b
[
i
]);
for
(
size_t
j
=
0
;
j
<
VEC_SIZE
/
WIDTH
;
++
j
)
{
__m256i
prev
=
_mm256_load_si256
((
__m256i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m256i
curr
=
_mm256_add_epi32
(
prev
,
ones
);
__m256i
prev_shr
=
_mm256_slli_si256
(
prev
,
4
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
_mm256_extract_epi32
(
prev
,
3
),
4
);
__m256i
curra
=
_mm256_loadu_si256
((
__m256i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m256i
mask
=
_mm256_cmpeq_epi32
(
curra
,
currb
);
prev_shr
=
_mm256_and_si256
(
prev_shr
,
mask
);
curr
=
_mm256_min_epi32
(
curr
,
prev_shr
);
_mm256_store_si256
((
__m256i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
curr_vec
[
j
]
=
curr_vec
[
j
-
1
];
std
::
swap
(
prev_vec
,
curr_vec
);
}
return
prev_vec
[
A_SIZE
];
}
};
#else
/*
template <>
class levenstein<policy_avx> : public levenstein<policy_sse> {
public:
levenstein(std::size_t a_size, std::size_t b_size) :
levenstein<policy_sse>(a_size, b_size){};
};*/
#endif
/*
#if USE_AVX512
template <>
class levenstein<policy_avx512> : public levenstein<policy_sse> {
public:
levenstein(std::size_t a_size, std::size_t b_size) :
levenstein<policy_sse>(a_size, b_size){};
};
#endif*/
}
// namespace levensol
#endif
#endif
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment