Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Dubský Jan
asgn
Commits
f307e683
Commit
f307e683
authored
Apr 11, 2020
by
Dubský Jan
Browse files
Solution 2.01 (Speed: 3.12)
parent
77671c51
Changes
2
Hide whitespace changes
Inline
Side-by-side
sol/levensol.cpp
View file @
f307e683
#include "levensol.hpp"
namespace
levensol
{
#if 0
template <>
std::uint32_t levenstein<policy_avx>::compute(const std::uint32_t* a, const std::uint32_t* b) {
//for (size_t i = 0; i < a_size + 1; ++i) prev.push_back(i);
}
#endif
}
// namespace levensol
sol/levensol.hpp
View file @
f307e683
#ifndef levensol_hpp_
#define levensol_hpp_
#include <cstdint>
#include <immintrin.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <vector>
namespace
levensol
{
template
<
typename
policy
>
class
levenstein
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
{}
template
<
typename
policy
>
class
levenstein
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
{
}
~
levenstein
()
{
}
//std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b) {
//}
};
class
levenstein_base
{
protected:
levenstein_base
(
std
::
size_t
a_size
,
std
::
size_t
b_size
,
size_t
width
)
:
A_SIZE
(
a_size
),
B_SIZE
(
b_size
),
WIDTH
(
width
),
ALIGNMENT
(
sizeof
(
uint32_t
)
*
WIDTH
),
VEC_SIZE
(
vec_size
(
a_size
+
1
)),
VEC_CNT
(
VEC_SIZE
/
WIDTH
)
{
zeroth_row
=
(
uint32_t
*
)
aligned_alloc
(
ALIGNMENT
,
VEC_SIZE
*
sizeof
(
uint32_t
));
assert
(
zeroth_row
);
for
(
size_t
i
=
0
;
i
<
VEC_SIZE
;
++
i
)
zeroth_row
[
i
]
=
i
;
buffer1
=
(
uint32_t
*
)
aligned_alloc
(
ALIGNMENT
,
VEC_SIZE
*
sizeof
(
uint32_t
));
assert
(
buffer1
);
buffer2
=
(
uint32_t
*
)
aligned_alloc
(
ALIGNMENT
,
VEC_SIZE
*
sizeof
(
uint32_t
));
assert
(
buffer2
);
}
~
levenstein_base
()
{
free
(
buffer1
);
free
(
buffer2
);
free
(
zeroth_row
);
}
protected:
const
size_t
A_SIZE
,
B_SIZE
;
const
size_t
WIDTH
;
const
size_t
ALIGNMENT
;
const
size_t
VEC_SIZE
;
const
size_t
VEC_CNT
;
uint32_t
*
buffer1
;
uint32_t
*
buffer2
;
uint32_t
*
zeroth_row
;
private:
size_t
vec_size
(
size_t
element_cnt
)
{
return
WIDTH
*
(
element_cnt
/
WIDTH
+
element_cnt
%
WIDTH
);
}
};
struct
policy_sse
{};
struct
policy_avx
{};
struct
policy_avx512
{};
//#define DEBUG_MODE
#ifdef DEBUG_MODE
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
return
0
;
struct
policy_scalar
{};
template
<
>
class
levenstein
<
policy_scalar
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
1
)
{
for
(
size_t
i
=
0
;
i
<
VEC_SIZE
;
++
i
)
buffer1
[
i
]
=
zeroth_row
[
i
];
}
#if 0
const uint32_t* compute_next_row(size_t row, const std::uint32_t* a, const std::uint32_t* b) {
buffer2[0] = row + 1;
for (size_t j = 1; j < A_SIZE + 1; ++j) {
buffer2[j] = std::min(std::min(buffer1[j], buffer2[j - 1]) + 1, buffer1[j - 1] + (a[j - 1] != b[row]));
}
std::swap(buffer1, buffer2);
return buffer1;
}
std::uint32_t compute(const std::uint32_t* a, const std::uint32_t* b) {
for (size_t i = 0; i < B_SIZE; ++i) {
compute_next_row(i, a, b);
}
};
return buffer1[A_SIZE];
}
#else
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
buffer2
[
0
]
=
i
+
1
;
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
{
buffer2
[
j
]
=
std
::
min
(
std
::
min
(
buffer1
[
j
],
buffer2
[
j
-
1
])
+
1
,
buffer1
[
j
-
1
]
+
(
a
[
j
-
1
]
!=
b
[
i
]));
}
std
::
swap
(
buffer1
,
buffer2
);
}
return
buffer1
[
A_SIZE
];
}
#endif
};
struct
policy_sse
{
};
inline
uint32_t
comp
(
size_t
a_size
,
size_t
b_size
,
const
uint32_t
*
a
,
const
uint32_t
*
b
)
{
levenstein
<
policy_scalar
>
l
(
a_size
,
b_size
);
return
l
.
compute
(
a
,
b
);
}
struct
policy_avx
{
};
inline
void
prt_diff
(
size_t
len
,
const
uint32_t
*
a
,
const
uint32_t
*
b
)
{
std
::
cout
<<
"DIFF:"
;
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
{
if
(
a
[
i
]
==
b
[
i
])
std
::
cout
<<
" "
<<
a
[
i
];
else
std
::
cout
<<
" \e[33m"
<<
a
[
i
]
<<
":"
<<
b
[
i
]
<<
"\e[00m"
;
}
std
::
cout
<<
std
::
endl
;
}
struct
policy_avx512
{
};
inline
void
prt_arr
(
size_t
len
,
const
uint32_t
*
array
,
const
char
*
title
=
nullptr
)
{
if
(
title
)
std
::
cout
<<
title
<<
": "
;
for
(
size_t
k
=
0
;
k
<
len
+
1
;
++
k
)
std
::
cout
<<
array
[
k
]
<<
" "
;
std
::
cout
<<
std
::
endl
;
}
template
<
typename
U
>
inline
void
prt_vec
(
U
vec
,
const
char
*
desc
)
{
constexpr
size_t
SZ
=
sizeof
(
U
)
/
sizeof
(
uint32_t
);
uint32_t
prt_arr
[
SZ
];
_mm_store_si128
((
__m128i
*
)(
prt_arr
),
vec
);
std
::
cout
<<
desc
<<
": "
;
for
(
size_t
k
=
0
;
k
<
SZ
;
++
k
)
std
::
cout
<<
prt_arr
[
k
]
<<
' '
;
std
::
cout
<<
std
::
endl
;
}
#endif
template
<
>
class
levenstein
<
policy_sse
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
4
)
{}
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
uint32_t
*
prev_vec
=
zeroth_row
,
*
curr_vec
=
buffer2
;
#ifdef DEBUG_MODE
levenstein
<
policy_scalar
>
lev_scal
(
A_SIZE
,
B_SIZE
);
if
(
A_SIZE
!=
128
)
{
return
0
;
}
std
::
cout
<<
"--------------------------------------"
<<
std
::
endl
;
prt_arr
(
A_SIZE
,
a
,
"A"
);
prt_arr
(
B_SIZE
,
b
,
"B"
);
prt_arr
(
A_SIZE
,
prev_vec
,
"INIT_PREV"
);
#endif
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m128i
currb
=
_mm_set1_epi32
(
b
[
i
]);
for
(
size_t
j
=
0
;
j
<
VEC_CNT
;
++
j
)
{
__m128i
prev
=
_mm_load_si128
((
__m128i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m128i
curr
=
_mm_add_epi32
(
prev
,
ones
);
__m128i
prev_shr
=
_mm_slli_si128
(
prev
,
4
);
prev_shr
=
_mm_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
__m128i
curra
=
_mm_loadu_si128
((
__m128i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m128i
mask
=
_mm_cmpeq_epi32
(
curra
,
currb
);
__m128i
ones_masked
=
_mm_and_si128
(
ones
,
~
mask
);
prev_shr
=
_mm_add_epi32
(
prev_shr
,
ones_masked
);
curr
=
_mm_min_epu32
(
curr
,
prev_shr
);
_mm_store_si128
((
__m128i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
curr_vec
[
j
]
=
std
::
min
(
curr_vec
[
j
],
curr_vec
[
j
-
1
]
+
1
);
if
(
!
i
)
prev_vec
=
buffer1
;
std
::
swap
(
prev_vec
,
curr_vec
);
#ifdef DEBUG_MODE
auto
correct
=
lev_scal
.
compute_next_row
(
i
,
a
,
b
);
prt_diff
(
A_SIZE
+
1
,
prev_vec
,
correct
);
#endif
}
#ifdef DEBUG_MODE
for
(
size_t
k
=
0
;
k
<
A_SIZE
+
1
;
++
k
)
std
::
cout
<<
prev_vec
[
k
]
<<
" "
;
std
::
cout
<<
std
::
endl
;
#endif
return
prev_vec
[
A_SIZE
];
}
private:
const
__m128i
ones
=
_mm_set1_epi32
(
1
);
};
// FIXME: Remove 1 in condition
#if USE_AVX && 0 //|| 1
template
<
>
class
levenstein
<
policy_avx
>
:
levenstein_base
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein_base
(
a_size
,
b_size
,
8
)
{}
std
::
uint32_t
compute
(
const
std
::
uint32_t
*
a
,
const
std
::
uint32_t
*
b
)
{
#ifdef DEBUG_MODE
return
comp
(
A_SIZE
,
B_SIZE
,
a
,
b
);
#endif
const
__m256i
ones
=
_mm256_set1_epi32
(
1
);
for
(
size_t
i
=
0
;
i
<
B_SIZE
;
++
i
)
{
__m256i
currb
=
_mm256_set1_epi32
(
b
[
i
]);
for
(
size_t
j
=
0
;
j
<
VEC_SIZE
/
WIDTH
;
++
j
)
{
__m256i
prev
=
_mm256_load_si256
((
__m256i
*
)(
prev_vec
+
(
WIDTH
*
j
)));
__m256i
curr
=
_mm256_add_epi32
(
prev
,
ones
);
__m256i
prev_shr
=
_mm256_slli_si256
(
prev
,
4
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
j
?
prev_vec
[
WIDTH
*
j
-
1
]
:
(
i
+
1
),
0
);
prev_shr
=
_mm256_insert_epi32
(
prev_shr
,
_mm256_extract_epi32
(
prev
,
3
),
4
);
__m256i
curra
=
_mm256_loadu_si256
((
__m256i
*
)(
a
+
(
WIDTH
*
j
-
1
)));
__m256i
mask
=
_mm256_cmpeq_epi32
(
curra
,
currb
);
prev_shr
=
_mm256_and_si256
(
prev_shr
,
mask
);
curr
=
_mm256_min_epi32
(
curr
,
prev_shr
);
_mm256_store_si256
((
__m256i
*
)(
curr_vec
+
(
WIDTH
*
j
)),
curr
);
}
for
(
size_t
j
=
1
;
j
<
A_SIZE
+
1
;
++
j
)
curr_vec
[
j
]
=
curr_vec
[
j
-
1
];
std
::
swap
(
prev_vec
,
curr_vec
);
}
return
prev_vec
[
A_SIZE
];
}
};
#else
template
<
>
class
levenstein
<
policy_avx
>
:
public
levenstein
<
policy_sse
>
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein
<
policy_sse
>
(
a_size
,
b_size
){};
};
#endif
#if USE_AVX512
template
<
>
class
levenstein
<
policy_avx512
>
:
public
levenstein
<
policy_sse
>
{
public:
levenstein
(
std
::
size_t
a_size
,
std
::
size_t
b_size
)
:
levenstein
<
policy_sse
>
(
a_size
,
b_size
){};
};
#endif
}
// namespace levensol
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment