Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dubský Jan
asgn
Commits
8fc0cd6a
Commit
8fc0cd6a
authored
Apr 12, 2020
by
Dubský Jan
Browse files
Solution 2.04 (Speed: 1.44)
parent
59393cea
Changes
1
Hide whitespace changes
Inline
Side-by-side
sol/levensol.hpp
View file @
8fc0cd6a
...
...
@@ -28,7 +28,7 @@ public:
class
levenstein_base
{
protected:
levenstein_base
(
std
::
size_t
a_size
,
std
::
size_t
b_size
,
size_t
width
)
:
A_SIZE
(
a_size
),
B_SIZE
(
b_size
),
WIDTH
(
width
),
ALIGNMENT
(
sizeof
(
uint32_t
)
*
WIDTH
),
VEC_SIZE
(
vec_size
(
a_size
+
1
)),
VEC_CNT
(
VEC_SIZE
/
WIDTH
)
,
MIN_DST
(
std
::
abs
((
int64_t
)
a_size
-
(
int64_t
)
b_size
)),
MAX_DST
(
std
::
min
(
a_size
,
b_size
)
+
MIN_DST
)
{
A_SIZE
(
a_size
),
B_SIZE
(
b_size
),
WIDTH
(
width
),
ALIGNMENT
(
sizeof
(
uint32_t
)
*
WIDTH
),
VEC_SIZE
(
vec_size
(
a_size
+
1
)),
VEC_CNT
(
VEC_SIZE
/
WIDTH
)
{
zeroth_row
=
(
uint32_t
*
)
aligned_alloc
(
ALIGNMENT
,
VEC_SIZE
*
sizeof
(
uint32_t
));
assert
(
zeroth_row
);
for
(
size_t
i
=
0
;
i
<
VEC_SIZE
;
++
i
)
zeroth_row
[
i
]
=
i
;
...
...
@@ -52,8 +52,6 @@ protected:
const
size_t
ALIGNMENT
;
const
size_t
VEC_SIZE
;
const
size_t
VEC_CNT
;
const
size_t
MIN_DST
;
const
size_t
MAX_DST
;
uint32_t
*
buffer1
;
uint32_t
*
buffer2
;
...
...
@@ -69,8 +67,6 @@ struct policy_sse {};
struct
policy_avx
{};
struct
policy_avx512
{};
//#define DEBUG_MODE
struct
policy_scalar
{};
template
<
>
...
...
@@ -80,10 +76,10 @@ public:
levenstein_base
(
a_size
,
b_size
,
1
)
{
}
// Used for debugging
const
uint32_t
*
compute_next_row
(
size_t
row
,
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
buffer2
[
0
]
=
row
+
1
;
for
(
size_t
j
=
std
::
max
(
0ul
,
row
-
MAX_DST
)
+
1
;
j
<
std
::
min
(
A_SIZE
,
row
+
MAX_DST
)
+
1
;
++
j
)
{
//for (size_t j = 1; j < A_SIZE + 1; ++j) {
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
{
buffer2
[
j
]
=
std
::
min
(
std
::
min
(
buffer1
[
j
],
buffer2
[
j
-
1
])
+
1
,
buffer1
[
j
-
1
]
+
(
a
[
j
-
1
]
!=
b
[
row
]));
}
std
::
swap
(
buffer1
,
buffer2
);
...
...
@@ -97,35 +93,11 @@ public:
}
return
buffer1
[
A_SIZE
];
}
#if 0
std::uint32_t compute2(const std::uint32_t* a, const std::uint32_t* b) {
for (size_t i = 1; i < MAT_H; ++i) {
mat(i, 0) = i;
for (size_t j = 1; j < MAT_W; ++j) {
mat(i, j) = std::min(mat(i - 1, j) + 1, mat(i - 1, j - 1) + (a[j - 1] != b[i]));
}
}
for (size_t j = 1; j < MAT_W; ++j) {
for (size_t i = 1; i < MAT_H; ++i) {
mat(i, j) = std::min(mat(i, j), mat(i, j - 1) + 1);
}
}
return mat(B_SIZE, A_SIZE);
}
std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b) {
uint32_t first = compute1(a, b);
uint32_t second = compute2(a, b);
std::cout << first << " : " << second << std::endl;
assert(first == second);
return first;
}
#endif
};
#ifdef
DEBUG_MODE
//#define
DEBUG_MODE
#ifdef DEBUG_MODE
inline
uint32_t
comp
(
size_t
a_size
,
size_t
b_size
,
const
uint32_t
*
a
,
const
uint32_t
*
b
)
{
levenstein
<
policy_scalar
>
l
(
a_size
,
b_size
);
return
l
.
compute
(
a
,
b
);
...
...
@@ -159,6 +131,15 @@ inline void prt_vec(U vec, const char* desc) {
#endif
#define gen_min_loop_128(k) \
{
\
prev
+=
1
;
\
uint32_t
curr
=
_mm_extract_epi32
(
tmp
,
k
);
\
if
(
prev
<
curr
)
tmp
=
_mm_insert_epi32
(
tmp
,
prev
,
k
);
\
else
\
prev
=
curr
;
\
}
template
<
>
class
levenstein
<
policy_sse
>
:
levenstein_base
{
public:
...
...
@@ -166,10 +147,6 @@ public:
levenstein_base
(
a_size
,
b_size
,
4
)
{}
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
// TODO: Fix this shit
//levenstein<policy_scalar> lev_scal(A_SIZE, B_SIZE);
//return lev_scal.compute(a, b);
uint32_t
*
prev_vec
=
zeroth_row
,
*
curr_vec
=
buffer2
;
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
...
...
@@ -178,8 +155,12 @@ public:
__m128i
prev
=
_mm_load_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m128i
curr
=
_mm_add_epi32
(
prev
,
ones
);
__m128i
prev_shr
=
_mm_slli_si128
(
prev
,
4
);
prev_shr
=
_mm_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
__m128i
prev_shr
;
if
(
j
)
prev_shr
=
_mm_loadu_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
j
)
-
1
));
else
{
prev_shr
=
_mm_slli_si128
(
prev
,
4
);
prev_shr
=
_mm_insert_epi32
(
prev_shr
,
(
i
+
1
),
0
);
}
__m128i
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m128i
mask
=
_mm_cmpeq_epi32
(
curra
,
currb
);
...
...
@@ -189,24 +170,33 @@ public:
curr
=
_mm_min_epu32
(
curr
,
prev_shr
);
_mm_store_si128
((
__m128i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
/*for (size_t k = j ? 0 : 1; k < WIDTH; ++k) {
uint32_t p = curr_vec[(WIDTH * j) + k - 1], c = curr_vec[(WIDTH * j) + k];
if (p + 1 < c) curr_vec[(WIDTH * j) + k] = p + 1;
}*/
}
#if 0
uint32_t prev = curr_vec[0];
for (size_t j = 1; j < A_SIZE + 1; ++j) {
prev += 1;
uint32_t curr = curr_vec[j];
if
(
prev
<
curr
)
{
curr_vec
[
j
]
=
prev
;
}
else
if (prev < curr) curr_vec[j] = prev;
else
prev = curr;
}
#else
uint32_t
prev
=
curr_vec
[
0
]
-
1
;
for
(
size_t
j
=
0
;
j
<
VEC_CNT
;
++
j
)
{
auto
addr
=
(
__m128i
*
)(
curr_vec
+
j
*
WIDTH
);
__m128i
tmp
=
_mm_load_si128
(
addr
);
gen_min_loop_128
(
0
);
gen_min_loop_128
(
1
);
gen_min_loop_128
(
2
);
gen_min_loop_128
(
3
);
_mm_store_si128
(
addr
,
tmp
);
}
#endif
if
(
!
i
)
prev_vec
=
buffer1
;
std
::
swap
(
prev_vec
,
curr_vec
);
if
(
!
i
)
curr_vec
=
buffer1
;
}
return
prev_vec
[
A_SIZE
];
...
...
@@ -216,8 +206,8 @@ private:
const
__m128i
ones
=
_mm_set1_epi32
(
1
);
};
// FIXME: Remove
1
in condition
#if USE_AVX && 0
//|| 1
// FIXME: Remove
0
in condition
#if USE_AVX && 0
template
<
>
class
levenstein
<
policy_avx
>
:
levenstein_base
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment