GCC Code Coverage Report


Directory: ./
File: tasks/korolev_k_matrix_mult/common/include/strassen_impl.hpp
Date: 2026-04-02 17:12:27
Exec Total Coverage
Lines: 16 91 17.6%
Functions: 2 12 16.7%
Branches: 12 144 8.3%

Line Branch Exec Source
1 #pragma once
2
3 #include <algorithm>
4 #include <cstddef>
5 #include <functional>
6 #include <utility>
7 #include <vector>
8
9 namespace korolev_k_matrix_mult::strassen_impl {
10
11 constexpr size_t kStrassenThreshold = 64;
12
13 inline size_t NextPowerOf2(size_t n) {
14
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 if (n <= 1) {
15 return 1;
16 }
17 size_t p = 1;
18
2/2
✓ Branch 0 taken 108 times.
✓ Branch 1 taken 36 times.
144 while (p < n) {
19 108 p *= 2;
20 }
21 return p;
22 }
23
24
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 inline void NaiveMultiply(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c,
25 size_t n) {
26 std::ranges::fill(c, 0.0);
27
2/2
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 36 times.
372 for (size_t i = 0; i < n; ++i) {
28
2/2
✓ Branch 0 taken 4032 times.
✓ Branch 1 taken 336 times.
4368 for (size_t k = 0; k < n; ++k) {
29 4032 double a_ik = a[(i * n) + k];
30
2/2
✓ Branch 0 taken 56064 times.
✓ Branch 1 taken 4032 times.
60096 for (size_t j = 0; j < n; ++j) {
31 56064 c[(i * n) + j] += a_ik * b[(k * n) + j];
32 }
33 }
34 }
35 36 }
36
37 namespace detail {
38
39 void AddBlock(const double *a_ptr, const double *b_ptr, size_t ar, size_t ac, size_t br, size_t bc, size_t n, size_t m,
40 std::vector<double> &out);
41 void SubBlock(const double *a_ptr, const double *b_ptr, size_t ar, size_t ac, size_t br, size_t bc, size_t n, size_t m,
42 std::vector<double> &out);
43 void CopyBlock(const double *a_ptr, size_t ro, size_t co, size_t n, size_t m, std::vector<double> &out);
44 void AssembleStrassenResult(std::vector<double> &c, const std::vector<double> &m1, const std::vector<double> &m2,
45 const std::vector<double> &m3, const std::vector<double> &m4, const std::vector<double> &m5,
46 const std::vector<double> &m6, const std::vector<double> &m7, size_t n, size_t m);
47
48 inline void AddBlock(const double *a_ptr, const double *b_ptr, size_t ar, size_t ac, size_t br, size_t bc, size_t n,
49 size_t m, std::vector<double> &out) {
50 for (size_t i = 0; i < m; ++i) {
51 for (size_t j = 0; j < m; ++j) {
52 out[(i * m) + j] = a_ptr[((ar + i) * n) + ac + j] + b_ptr[((br + i) * n) + bc + j];
53 }
54 }
55 }
56
57 inline void SubBlock(const double *a_ptr, const double *b_ptr, size_t ar, size_t ac, size_t br, size_t bc, size_t n,
58 size_t m, std::vector<double> &out) {
59 for (size_t i = 0; i < m; ++i) {
60 for (size_t j = 0; j < m; ++j) {
61 out[(i * m) + j] = a_ptr[((ar + i) * n) + ac + j] - b_ptr[((br + i) * n) + bc + j];
62 }
63 }
64 }
65
66 inline void CopyBlock(const double *a_ptr, size_t ro, size_t co, size_t n, size_t m, std::vector<double> &out) {
67 for (size_t i = 0; i < m; ++i) {
68 for (size_t j = 0; j < m; ++j) {
69 out[(i * m) + j] = a_ptr[((ro + i) * n) + co + j];
70 }
71 }
72 }
73
74 inline void AssembleStrassenResult(std::vector<double> &c, const std::vector<double> &m1, const std::vector<double> &m2,
75 const std::vector<double> &m3, const std::vector<double> &m4,
76 const std::vector<double> &m5, const std::vector<double> &m6,
77 const std::vector<double> &m7, size_t n, size_t m) {
78 for (size_t i = 0; i < m; ++i) {
79 for (size_t j = 0; j < m; ++j) {
80 c[(i * n) + j] = m1[(i * m) + j] + m4[(i * m) + j] - m5[(i * m) + j] + m7[(i * m) + j];
81 c[(i * n) + j + m] = m3[(i * m) + j] + m5[(i * m) + j];
82 c[((m + i) * n) + j] = m2[(i * m) + j] + m4[(i * m) + j];
83 c[((m + i) * n) + j + m] = m1[(i * m) + j] - m2[(i * m) + j] + m3[(i * m) + j] + m6[(i * m) + j];
84 }
85 }
86 }
87
88 } // namespace detail
89
90 template <typename ParallelRun>
91 36 void StrassenRecurse(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, size_t n,
92 ParallelRun &&parallel_run) {
93
1/2
✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
36 if (n <= kStrassenThreshold) {
94 36 NaiveMultiply(a, b, c, n);
95 36 return;
96 }
97
98 size_t m = n / 2;
99 size_t sz = m * m;
100
101 std::vector<double> m1(sz);
102 std::vector<double> m2(sz);
103 std::vector<double> m3(sz);
104 std::vector<double> m4(sz);
105 std::vector<double> m5(sz);
106 std::vector<double> m6(sz);
107 std::vector<double> m7(sz);
108
109 std::vector<std::function<void()>> tasks = {
110 [&]() {
111 std::vector<double> t1(sz);
112 std::vector<double> t2(sz);
113 detail::AddBlock(a.data(), a.data(), 0, 0, m, m, n, m, t1);
114 detail::AddBlock(b.data(), b.data(), 0, 0, m, m, n, m, t2);
115 StrassenRecurse(t1, t2, m1, m, std::forward<ParallelRun>(parallel_run));
116 },
117 [&]() {
118 std::vector<double> t1(sz);
119 std::vector<double> t2(sz);
120 detail::AddBlock(a.data(), a.data(), m, 0, m, m, n, m, t1);
121 detail::CopyBlock(b.data(), 0, 0, n, m, t2);
122 StrassenRecurse(t1, t2, m2, m, std::forward<ParallelRun>(parallel_run));
123 },
124 [&]() {
125 std::vector<double> t1(sz);
126 std::vector<double> t2(sz);
127 detail::SubBlock(b.data(), b.data(), 0, m, m, m, n, m, t1);
128 detail::CopyBlock(a.data(), 0, 0, n, m, t2);
129 StrassenRecurse(t2, t1, m3, m, std::forward<ParallelRun>(parallel_run));
130 },
131 [&]() {
132 std::vector<double> t1(sz);
133 std::vector<double> t2(sz);
134 detail::SubBlock(b.data(), b.data(), m, 0, 0, 0, n, m, t1);
135 detail::CopyBlock(a.data(), m, m, n, m, t2);
136 StrassenRecurse(t2, t1, m4, m, std::forward<ParallelRun>(parallel_run));
137 },
138 [&]() {
139 std::vector<double> t1(sz);
140 std::vector<double> t2(sz);
141 detail::AddBlock(a.data(), a.data(), 0, 0, 0, m, n, m, t1);
142 detail::CopyBlock(b.data(), m, m, n, m, t2);
143 StrassenRecurse(t1, t2, m5, m, std::forward<ParallelRun>(parallel_run));
144 },
145 [&]() {
146 std::vector<double> t1(sz);
147 std::vector<double> t2(sz);
148 detail::SubBlock(a.data(), a.data(), m, 0, 0, 0, n, m, t1);
149 detail::AddBlock(b.data(), b.data(), 0, 0, 0, m, n, m, t2);
150 StrassenRecurse(t1, t2, m6, m, std::forward<ParallelRun>(parallel_run));
151 },
152 [&]() {
153 std::vector<double> t1(sz);
154 std::vector<double> t2(sz);
155 detail::SubBlock(a.data(), a.data(), 0, m, m, m, n, m, t1);
156 detail::AddBlock(b.data(), b.data(), m, 0, m, m, n, m, t2);
157 StrassenRecurse(t1, t2, m7, m, std::forward<ParallelRun>(parallel_run));
158 },
159 };
160
161 parallel_run(tasks);
162
163 detail::AssembleStrassenResult(c, m1, m2, m3, m4, m5, m6, m7, n, m);
164 }
165
166 inline void StrassenMultiply(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c,
167 size_t n, const std::function<void(std::vector<std::function<void()>> &)> &parallel_run) {
168
1/4
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
36 StrassenRecurse(a, b, c, n, parallel_run);
169 36 }
170
171 } // namespace korolev_k_matrix_mult::strassen_impl
172