GCC Code Coverage Report

Directory:	./
File:	tasks/cheremkhin_a_matr_mult_cannon_alg/omp/src/ops_omp.cpp
Date:	2026-05-11 08:26:31

	Exec	Total	Coverage
Lines:	44	44	100.0%
Functions:	6	6	100.0%
Branches:	16	30	53.3%

  
      Line
      Branch
      Exec
      Source
    
      #include "cheremkhin_a_matr_mult_cannon_alg/omp/include/ops_omp.hpp"
    
      #include <omp.h>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <cstdint>
    
      #include <utility>
    
      #include <vector>
    
      #include "cheremkhin_a_matr_mult_cannon_alg/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace cheremkhin_a_matr_mult_cannon_alg {
    
      namespace {
    
      inline std::size_t Idx(std::size_t n, std::size_t r, std::size_t c) {
    
        return (r * n) + c;
    
      }
    
      std::size_t ChooseQ(std::size_t n) {
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.

      28
        if (n <= 1) {
    
          return 1;
    
        }
    
      24
        const auto root = static_cast<std::size_t>(std::sqrt(static_cast<double>(n)));
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.

      24
        return (root == 0) ? 1 : root;
    
      }
    
      std::size_t CeilDiv(std::size_t a, std::size_t b) {
    
      28
        return (a + b - 1) / b;
    
      }
    
      292
      void MulAddBlock(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, std::size_t n,
    
                       std::size_t bs, std::size_t bi, std::size_t bk, std::size_t bj) {
    
      292
        const std::size_t i0 = bi * bs;
    
      292
        const std::size_t k0 = bk * bs;
    
      292
        const std::size_t j0 = bj * bs;
    
      292
        const auto bs64 = static_cast<std::int64_t>(bs);
    
        2/2✓ Branch 0 taken 1188 times.
✓ Branch 1 taken 292 times.

      1480
        for (std::size_t ii = 0; ii < bs; ++ii) {
    
      1188
          const std::size_t i = i0 + ii;
    
      1188
          const std::size_t a_row = i * n;
    
          const std::size_t c_row = i * n;
    
      1188
          double *c_block = c.data() + c_row + j0;
    
        2/2✓ Branch 0 taken 5124 times.
✓ Branch 1 taken 1188 times.

      6312
          for (std::size_t kk = 0; kk < bs; ++kk) {
    
      5124
            const std::size_t k = k0 + kk;
    
      5124
            const double aik = a[a_row + k];
    
      5124
            const double *b_block = b.data() + (k * n) + j0;
    
        2/2✓ Branch 0 taken 22860 times.
✓ Branch 1 taken 5124 times.

      27984
            for (std::int64_t jj = 0; jj < bs64; ++jj) {
    
      22860
              c_block[jj] += aik * b_block[jj];
    
            }
    
          }
    
        }
    
      292
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.

      28
      CheremkhinAMatrMultCannonAlgOMP::CheremkhinAMatrMultCannonAlgOMP(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
        GetOutput() = {};
    
      28
      }
    
      28
      bool CheremkhinAMatrMultCannonAlgOMP::ValidationImpl() {
    
      28
        const std::size_t n = std::get<0>(GetInput());
    
        const auto &a = std::get<1>(GetInput());
    
        const auto &b = std::get<2>(GetInput());
    
        3/6✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 28 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 28 times.

      28
        return n > 0 && a.size() == n * n && b.size() == n * n;
    
      }
    
      28
      bool CheremkhinAMatrMultCannonAlgOMP::PreProcessingImpl() {
    
        GetOutput() = {};
    
      28
        return true;
    
      }
    
      28
      bool CheremkhinAMatrMultCannonAlgOMP::RunImpl() {
    
      28
        const std::size_t n = std::get<0>(GetInput());
    
        const auto &a_in = std::get<1>(GetInput());
    
        const auto &b_in = std::get<2>(GetInput());
    
      28
        const int requested_threads = ppc::util::GetNumThreads();
    
        const std::size_t q = ChooseQ(n);
    
        const std::size_t bs = CeilDiv(n, q);
    
      28
        const std::size_t np = q * bs;
    
      28
        const auto n64 = static_cast<std::int64_t>(n);
    
      28
        const auto q64 = static_cast<std::int64_t>(q);
    
      28
        std::vector<double> a(np * np, 0.0);
    
        1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      28
        std::vector<double> b(np * np, 0.0);
    
        1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      28
        std::vector<double> c(np * np, 0.0);
    
      28
        omp_set_num_threads(requested_threads);
    
      28
      #pragma omp parallel for default(none) schedule(static) shared(a, b, a_in, b_in, n, np, n64)
    
        for (std::int64_t i = 0; i < n64; ++i) {
    
          for (std::size_t j = 0; j < n; ++j) {
    
            a[Idx(np, static_cast<std::size_t>(i), j)] = a_in[Idx(n, static_cast<std::size_t>(i), j)];
    
            b[Idx(np, static_cast<std::size_t>(i), j)] = b_in[Idx(n, static_cast<std::size_t>(i), j)];
    
          }
    
        }
    
      28
      #pragma omp parallel for default(none) collapse(2) schedule(static) shared(a, b, c, np, bs, q, q64)
    
        for (std::int64_t bi = 0; bi < q64; ++bi) {
    
          for (std::int64_t bj = 0; bj < q64; ++bj) {
    
            for (std::size_t step = 0; step < q; ++step) {
    
              const std::size_t bk = (static_cast<std::size_t>(bi) + static_cast<std::size_t>(bj) + step) % q;
    
              MulAddBlock(a, b, c, np, bs, static_cast<std::size_t>(bi), bk, static_cast<std::size_t>(bj));
    
            }
    
          }
    
        }
    
        1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      28
        std::vector<double> out(n * n, 0.0);
    
      28
      #pragma omp parallel for default(none) schedule(static) shared(out, c, n, np, n64)
    
        for (std::int64_t i = 0; i < n64; ++i) {
    
          for (std::size_t j = 0; j < n; ++j) {
    
            out[Idx(n, static_cast<std::size_t>(i), j)] = c[Idx(np, static_cast<std::size_t>(i), j)];
    
          }
    
        }
    
        GetOutput() = std::move(out);
    
      28
        return true;
    
      }
    
      28
      bool CheremkhinAMatrMultCannonAlgOMP::PostProcessingImpl() {
    
      28
        return true;
    
      }
    
      }  // namespace  cheremkhin_a_matr_mult_cannon_alg

Line	Branch	Exec	Source
1			#include "cheremkhin_a_matr_mult_cannon_alg/omp/include/ops_omp.hpp"
2
3			#include <omp.h>
4
5			#include <cmath>
6			#include <cstddef>
7			#include <cstdint>
8			#include <utility>
9			#include <vector>
10
11			#include "cheremkhin_a_matr_mult_cannon_alg/common/include/common.hpp"
12			#include "util/include/util.hpp"
13
14			namespace cheremkhin_a_matr_mult_cannon_alg {
15
16			namespace {
17
18			inline std::size_t Idx(std::size_t n, std::size_t r, std::size_t c) {
19			return (r * n) + c;
20			}
21
22			std::size_t ChooseQ(std::size_t n) {
23	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 24 times.	28	if (n <= 1) {
24			return 1;
25			}
26
27		24	const auto root = static_cast<std::size_t>(std::sqrt(static_cast<double>(n)));
28	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times.	24	return (root == 0) ? 1 : root;
29			}
30
31			std::size_t CeilDiv(std::size_t a, std::size_t b) {
32		28	return (a + b - 1) / b;
33			}
34
35		292	void MulAddBlock(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, std::size_t n,
36			std::size_t bs, std::size_t bi, std::size_t bk, std::size_t bj) {
37		292	const std::size_t i0 = bi * bs;
38		292	const std::size_t k0 = bk * bs;
39		292	const std::size_t j0 = bj * bs;
40		292	const auto bs64 = static_cast<std::int64_t>(bs);
41
42	2/2 ✓ Branch 0 taken 1188 times. ✓ Branch 1 taken 292 times.	1480	for (std::size_t ii = 0; ii < bs; ++ii) {
43		1188	const std::size_t i = i0 + ii;
44		1188	const std::size_t a_row = i * n;
45			const std::size_t c_row = i * n;
46		1188	double *c_block = c.data() + c_row + j0;
47	2/2 ✓ Branch 0 taken 5124 times. ✓ Branch 1 taken 1188 times.	6312	for (std::size_t kk = 0; kk < bs; ++kk) {
48		5124	const std::size_t k = k0 + kk;
49		5124	const double aik = a[a_row + k];
50		5124	const double b_block = b.data() + (k n) + j0;
51	2/2 ✓ Branch 0 taken 22860 times. ✓ Branch 1 taken 5124 times.	27984	for (std::int64_t jj = 0; jj < bs64; ++jj) {
52		22860	c_block[jj] += aik * b_block[jj];
53			}
54			}
55			}
56		292	}
57
58			} // namespace
59
60	1/2 ✓ Branch 1 taken 28 times. ✗ Branch 2 not taken.	28	CheremkhinAMatrMultCannonAlgOMP::CheremkhinAMatrMultCannonAlgOMP(const InType &in) {
61			SetTypeOfTask(GetStaticTypeOfTask());
62			GetInput() = in;
63			GetOutput() = {};
64		28	}
65
66		28	bool CheremkhinAMatrMultCannonAlgOMP::ValidationImpl() {
67		28	const std::size_t n = std::get<0>(GetInput());
68			const auto &a = std::get<1>(GetInput());
69			const auto &b = std::get<2>(GetInput());
70	3/6 ✓ Branch 0 taken 28 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 28 times. ✗ Branch 4 not taken. ✓ Branch 5 taken 28 times.	28	return n > 0 && a.size() == n * n && b.size() == n * n;
71			}
72
73		28	bool CheremkhinAMatrMultCannonAlgOMP::PreProcessingImpl() {
74			GetOutput() = {};
75		28	return true;
76			}
77
78		28	bool CheremkhinAMatrMultCannonAlgOMP::RunImpl() {
79		28	const std::size_t n = std::get<0>(GetInput());
80			const auto &a_in = std::get<1>(GetInput());
81			const auto &b_in = std::get<2>(GetInput());
82		28	const int requested_threads = ppc::util::GetNumThreads();
83
84			const std::size_t q = ChooseQ(n);
85			const std::size_t bs = CeilDiv(n, q);
86		28	const std::size_t np = q * bs;
87		28	const auto n64 = static_cast<std::int64_t>(n);
88		28	const auto q64 = static_cast<std::int64_t>(q);
89
90		28	std::vector<double> a(np * np, 0.0);
91	1/4 ✓ Branch 1 taken 28 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	28	std::vector<double> b(np * np, 0.0);
92	1/4 ✓ Branch 1 taken 28 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	28	std::vector<double> c(np * np, 0.0);
93
94		28	omp_set_num_threads(requested_threads);
95
96		28	#pragma omp parallel for default(none) schedule(static) shared(a, b, a_in, b_in, n, np, n64)
97			for (std::int64_t i = 0; i < n64; ++i) {
98			for (std::size_t j = 0; j < n; ++j) {
99			a[Idx(np, static_cast<std::size_t>(i), j)] = a_in[Idx(n, static_cast<std::size_t>(i), j)];
100			b[Idx(np, static_cast<std::size_t>(i), j)] = b_in[Idx(n, static_cast<std::size_t>(i), j)];
101			}
102			}
103
104		28	#pragma omp parallel for default(none) collapse(2) schedule(static) shared(a, b, c, np, bs, q, q64)
105			for (std::int64_t bi = 0; bi < q64; ++bi) {
106			for (std::int64_t bj = 0; bj < q64; ++bj) {
107			for (std::size_t step = 0; step < q; ++step) {
108			const std::size_t bk = (static_cast<std::size_t>(bi) + static_cast<std::size_t>(bj) + step) % q;
109			MulAddBlock(a, b, c, np, bs, static_cast<std::size_t>(bi), bk, static_cast<std::size_t>(bj));
110			}
111			}
112			}
113
114	1/4 ✓ Branch 1 taken 28 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	28	std::vector<double> out(n * n, 0.0);
115
116		28	#pragma omp parallel for default(none) schedule(static) shared(out, c, n, np, n64)
117			for (std::int64_t i = 0; i < n64; ++i) {
118			for (std::size_t j = 0; j < n; ++j) {
119			out[Idx(n, static_cast<std::size_t>(i), j)] = c[Idx(np, static_cast<std::size_t>(i), j)];
120			}
121			}
122
123			GetOutput() = std::move(out);
124		28	return true;
125			}
126
127		28	bool CheremkhinAMatrMultCannonAlgOMP::PostProcessingImpl() {
128		28	return true;
129			}
130
131			} // namespace cheremkhin_a_matr_mult_cannon_alg
132