Commit a079d1d8 authored by Dubský Jan's avatar Dubský Jan
Browse files

Solution 4.03 (Speed: 1.59)

parent b8f2bba6
#ifndef matrixsol_hpp
#define matrixsol_hpp
#include <immintrin.h>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <limits>
namespace matrixsol {
#define CACHE_LINE_SIZE 64
template <typename policy>
class matrix {
class matrix_base {
public:
using matrix_element = std::uint16_t;
matrix(std::size_t vs, std::size_t hs) :
VS(vs), HS(hs), ROW_BYTES(calc_row_bytes(hs)), COL_BYTES(calc_row_bytes(vs)), ROW_SIZE(ROW_BYTES / sizeof(matrix_element)), COL_SIZE(COL_BYTES / sizeof(matrix_element)) {
matrix_base(std::size_t vs, std::size_t hs, size_t vec_size) :
VS(vs), HS(hs), VEC_SIZE(vec_size), VEC_CNT(VEC_SIZE / sizeof(matrix_element)), ROW_BYTES(calc_row_bytes(hs)), COL_BYTES(calc_row_bytes(vs)), ROW_SIZE(ROW_BYTES / sizeof(matrix_element)), COL_SIZE(COL_BYTES / sizeof(matrix_element)) {
mat = (uint16_t*)std::aligned_alloc(CACHE_LINE_SIZE, VS * ROW_BYTES);
mat_r = (uint16_t*)std::aligned_alloc(CACHE_LINE_SIZE, HS * COL_BYTES);
assert(((size_t)(mat) & (CACHE_LINE_SIZE - 1)) == 0);
assert(((size_t)(mat_r) & (CACHE_LINE_SIZE - 1)) == 0);
}
std::size_t vsize() const {
......@@ -41,25 +45,9 @@ public:
return addr(i, j);
}
void assign_mul(const matrix& a, const matrix& b) {
assert(a.VS == VS);
assert(b.HS == HS);
assert(a.HS == b.VS);
for (size_t i = 0; i < VS; ++i) {
for (size_t j = 0; j < HS; ++j) {
matrix_element res = std::numeric_limits<matrix_element>::max();
for (size_t k = 0; k < a.HS; ++k) {
res = std::min(res, (matrix_element)(a.addr(i, k) + b.addr_r(k, j)));
}
addr(i, j) = res;
addr_r(i, j) = res;
}
}
}
private:
protected:
const size_t VS, HS;
const size_t VEC_SIZE, VEC_CNT;
const size_t ROW_BYTES, COL_BYTES;
const size_t ROW_SIZE, COL_SIZE;
uint16_t* mat;
......@@ -77,6 +65,7 @@ private:
return *(mat_r + (col * COL_SIZE) + row);
}
private:
static size_t calc_row_bytes(size_t val) {
size_t ret = sizeof(matrix_element) * val;
if (ret % CACHE_LINE_SIZE) {
......@@ -87,10 +76,49 @@ private:
}
};
template <typename policy>
class matrix : public matrix_base {
public:
matrix(std::size_t vs, std::size_t hs) :
matrix_base(vs, hs, 16) {
std::cerr << VEC_CNT << std::endl;
}
void assign_mul(const matrix& a, const matrix& b) {
assert(a.VS == VS);
assert(b.HS == HS);
assert(a.HS == b.VS);
for (size_t i = 0; i < VS; ++i) {
for (size_t j = 0; j < HS; ++j) {
const matrix_element res_init_val = std::numeric_limits<matrix_element>::max();
__m128i res_vec = _mm_set1_epi16(res_init_val);
size_t K_LIMIT = a.HS / VEC_CNT;
for (size_t k = 0; k < K_LIMIT; ++k) {
__m128i val_a = _mm_load_si128((__m128i*)&a.addr(i, k * VEC_CNT));
__m128i val_b = _mm_load_si128((__m128i*)&b.addr_r(k * VEC_CNT, j));
__m128i val = _mm_add_epi16(val_a, val_b);
res_vec = _mm_min_epu16(res_vec, val);
}
matrix_element tmp[VEC_CNT];
_mm_storeu_si128((__m128i*)tmp, res_vec);
matrix_element res = res_init_val;
for (size_t k = 0; k < VEC_CNT; ++k) res = std::min(res, tmp[k]);
addr(i, j) = res;
addr_r(i, j) = res;
}
}
}
};
/*
struct policy_scalar {
};
*/
struct policy_scalar {
};
*/
struct policy_sse {
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment