Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dubský Jan
asgn
Commits
afbc226a
Commit
afbc226a
authored
Apr 12, 2020
by
Dubský Jan
Browse files
Solution 2.07.1 (Speed: 1.06)
parent
15c7f11e
Changes
1
Hide whitespace changes
Inline
Side-by-side
sol/levensol.hpp
View file @
afbc226a
...
...
@@ -106,40 +106,6 @@ public:
}
};
//#define DEBUG_MODE
#ifdef DEBUG_MODE
inline
uint32_t
comp
(
size_t
a_size
,
size_t
b_size
,
const
uint32_t
*
a
,
const
uint32_t
*
b
)
{
levenstein
<
policy_scalar
>
l
(
a_size
,
b_size
);
return
l
.
compute
(
a
,
b
);
}
inline
void
prt_diff
(
size_t
len
,
const
uint32_t
*
a
,
const
uint32_t
*
b
)
{
std
::
cout
<<
"DIFF:"
;
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
{
if
(
a
[
i
]
==
b
[
i
])
std
::
cout
<<
" "
<<
a
[
i
];
else
std
::
cout
<<
" \e[33m"
<<
a
[
i
]
<<
":"
<<
b
[
i
]
<<
"\e[00m"
;
}
std
::
cout
<<
std
::
endl
;
}
inline
void
prt_arr
(
size_t
len
,
const
uint32_t
*
array
,
const
char
*
title
=
nullptr
)
{
if
(
title
)
std
::
cout
<<
title
<<
": "
;
for
(
size_t
k
=
0
;
k
<
len
+
1
;
++
k
)
std
::
cout
<<
array
[
k
]
<<
" "
;
std
::
cout
<<
std
::
endl
;
}
template
<
typename
U
>
inline
void
prt_vec
(
U
vec
,
const
char
*
desc
)
{
constexpr
size_t
SZ
=
sizeof
(
U
)
/
sizeof
(
uint32_t
);
uint32_t
prt_arr
[
SZ
];
_mm_store_si128
((
__m128i
*
)(
prt_arr
),
vec
);
std
::
cout
<<
desc
<<
": "
;
return
;
#endif
#define gen_min_loop_128(k) \
{
\
prev_val
+=
1
;
\
...
...
@@ -149,123 +115,94 @@ inline void prt_vec(U vec, const char* desc) {
prev_val
=
curr_val
;
\
}
template
<
>
class
levenstein
<
policy_sse
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
4
)
{}
//std::uint32_t compute(const std::uint32_t* __restrict__ a, const std::uint32_t* __restrict__ b) {
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
if
(
SWAP
)
std
::
swap
(
a
,
b
);
//__m128i *prev_vec = (__m128i*)zeroth_row, *curr_vec = (__m128i*)buffer2;
uint32_t
*
prev_vec
=
zeroth_row
,
*
curr_vec
=
buffer2
;
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m128i
currb
=
_mm_set1_epi32
(
b
[
i
]);
curr_vec
[
-
1
]
=
i
+
2
;
uint32_t
prev_val
=
i
;
__m128i
next_prev_shr
=
_mm_loadu_si128
((
__m128i
*
)(
prev_vec
-
1
));
__m128i
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
-
1
));
for
(
size_t
j
=
0
;
j
<
VEC_CNT
;
++
j
)
{
__m128i
prev
=
_mm_load_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m128i
curr
=
_mm_add_epi32
(
prev
,
ones
);
__m128i
prev_shr
=
next_prev_shr
;
__m128i
mask
=
_mm_cmpeq_epi32
(
curra
,
currb
);
__m128i
ones_masked
=
_mm_and_si128
(
ones
,
~
mask
);
prev_shr
=
_mm_add_epi32
(
prev_shr
,
ones_masked
);
curr
=
_mm_min_epu32
(
curr
,
prev_shr
);
next_prev_shr
=
_mm_loadu_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
(
j
+
1
))
-
1
));
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
+
(
WIDTH
*
(
j
+
1
)
-
1
)));
#if 1
gen_min_loop_128
(
0
);
gen_min_loop_128
(
1
);
gen_min_loop_128
(
2
);
gen_min_loop_128
(
3
);
#endif
_mm_store_si128
((
__m128i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
#if 0
#if 0
uint32_t prev = curr_vec[0];
for (size_t j = 1; j < A_SIZE + 1; ++j) {
prev += 1;
uint32_t curr = curr_vec[j];
if (prev < curr) curr_vec[j] = prev;
else
prev = curr;
}
#else
uint32_t prev_val = curr_vec[0] - 1;
for (size_t j = 0; j < VEC_CNT; ++j) {
auto addr = (__m128i*)(curr_vec + j * WIDTH);
__m128i curr = _mm_load_si128(addr);
gen_min_loop_128(0);
gen_min_loop_128(1);
gen_min_loop_128(2);
gen_min_loop_128(3);
_mm_store_si128(addr, curr);
}
#endif
#endif
template
<
>
class
levenstein
<
policy_sse
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
4
)
{}
std
::
swap
(
prev_vec
,
curr_vec
);
if
(
!
i
)
curr_vec
=
buffer1
;
//if (!i) prev_vec = buffer1;
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
if
(
SWAP
)
std
::
swap
(
a
,
b
);
uint32_t
*
prev_vec
=
zeroth_row
,
*
curr_vec
=
buffer2
;
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m128i
currb
=
_mm_set1_epi32
(
b
[
i
]);
curr_vec
[
-
1
]
=
i
+
2
;
uint32_t
prev_val
=
i
;
__m128i
next_prev_shr
=
_mm_loadu_si128
((
__m128i
*
)(
prev_vec
-
1
));
__m128i
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
-
1
));
for
(
size_t
j
=
0
;
j
<
VEC_CNT
;
++
j
)
{
__m128i
prev
=
_mm_load_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m128i
curr
=
_mm_add_epi32
(
prev
,
ones
);
__m128i
prev_shr
=
next_prev_shr
;
__m128i
mask
=
_mm_cmpeq_epi32
(
curra
,
currb
);
__m128i
ones_masked
=
_mm_and_si128
(
ones
,
~
mask
);
prev_shr
=
_mm_add_epi32
(
prev_shr
,
ones_masked
);
curr
=
_mm_min_epu32
(
curr
,
prev_shr
);
next_prev_shr
=
_mm_loadu_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
(
j
+
1
))
-
1
));
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
+
(
WIDTH
*
(
j
+
1
)
-
1
)));
gen_min_loop_128
(
0
);
gen_min_loop_128
(
1
);
gen_min_loop_128
(
2
);
gen_min_loop_128
(
3
);
_mm_store_si128
((
__m128i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
return
prev_vec
[
A_SIZE
];
std
::
swap
(
prev_vec
,
curr_vec
);
if
(
!
i
)
curr_vec
=
buffer1
;
}
private:
const
__m128i
ones
=
_mm_set1_epi32
(
1
);
};
return
prev_vec
[
A_SIZE
];
}
private:
const
__m128i
ones
=
_mm_set1_epi32
(
1
);
};
// FIXME: Remove 0 in condition
#if USE_AVX && 0
template
<
>
class
levenstein
<
policy_avx
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
8
)
{}
template
<
>
class
levenstein
<
policy_avx
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
8
)
{}
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
#ifdef DEBUG_MODE
return
comp
(
A_SIZE
,
B_SIZE
,
a
,
b
);
return
comp
(
A_SIZE
,
B_SIZE
,
a
,
b
);
#endif
const
__m256i
ones
=
_mm256_set1_epi32
(
1
);
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m256i
currb
=
_mm256_set1_epi32
(
b
[
i
]);
for
(
size_t
j
=
0
;
j
<
VEC_SIZE
/
WIDTH
;
++
j
)
{
__m256i
prev
=
_mm256_load_si256
((
__m256i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m256i
curr
=
_mm256_add_epi32
(
prev
,
ones
);
__m256i
prev_shr
=
_mm256_slli_si256
(
prev
,
4
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
_mm256_extract_epi32
(
prev
,
3
),
4
);
__m256i
curra
=
_mm256_loadu_si256
((
__m256i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m256i
mask
=
_mm256_cmpeq_epi32
(
curra
,
currb
);
prev_shr
=
_mm256_and_si256
(
prev_shr
,
mask
);
curr
=
_mm256_min_epi32
(
curr
,
prev_shr
);
const
__m256i
ones
=
_mm256_set1_epi32
(
1
);
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m256i
currb
=
_mm256_set1_epi32
(
b
[
i
]);
for
(
size_t
j
=
0
;
j
<
VEC_SIZE
/
WIDTH
;
++
j
)
{
__m256i
prev
=
_mm256_load_si256
((
__m256i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m256i
curr
=
_mm256_add_epi32
(
prev
,
ones
);
_mm256_store_si256
((
__m256i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
__m256i
prev_shr
=
_mm256_slli_si256
(
prev
,
4
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
_mm256_extract_epi32
(
prev
,
3
),
4
);
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
curr_vec
[
j
]
=
curr_vec
[
j
-
1
];
__m256i
curra
=
_mm256_loadu_si256
((
__m256i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m256i
mask
=
_mm256_cmpeq_epi32
(
curra
,
currb
);
prev_shr
=
_mm256_and_si256
(
prev_shr
,
mask
);
curr
=
_mm256_min_epi32
(
curr
,
prev_shr
);
std
::
swap
(
prev_vec
,
curr
_vec
);
_mm256_store_si256
((
__m256i
*
)(
curr_vec
+
(
WIDTH
*
j
))
,
curr
);
}
return
prev_vec
[
A_SIZE
];
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
curr_vec
[
j
]
=
curr_vec
[
j
-
1
];
std
::
swap
(
prev_vec
,
curr_vec
);
}
};
return
prev_vec
[
A_SIZE
];
}
};
#else
...
...
@@ -280,12 +217,12 @@ public:
#if USE_AVX512
template
<
>
class
levenstein
<
policy_avx512
>
:
public
levenstein
<
policy_sse
>
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein
<
policy_sse
>
(
a_size
,
b_size
){};
};
template
<
>
class
levenstein
<
policy_avx512
>
:
public
levenstein
<
policy_sse
>
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein
<
policy_sse
>
(
a_size
,
b_size
){};
};
#endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment