GCC Code Coverage Report


Directory: ./
File: tasks/timur_a_cannon/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 86 86 100.0%
Functions: 12 12 100.0%
Branches: 58 94 61.7%

Line Branch Exec Source
1 #include "timur_a_cannon/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <cstddef>
8 #include <tuple>
9 #include <utility>
10 #include <vector>
11
12 namespace timur_a_cannon {
13
14 namespace {
15
16 using Matrix = std::vector<std::vector<double>>;
17
18 76 void CopyBlocksForStep(const Matrix &src_a, const Matrix &src_b, int b_size, int global_i, int shift, int j,
19 Matrix &block_a, Matrix &block_b) {
20
2/2
✓ Branch 0 taken 196 times.
✓ Branch 1 taken 76 times.
272 for (int row = 0; row < b_size; ++row) {
21
2/2
✓ Branch 0 taken 556 times.
✓ Branch 1 taken 196 times.
752 for (int col = 0; col < b_size; ++col) {
22 556 block_a[row][col] = src_a[(global_i * b_size) + row][(shift * b_size) + col];
23 556 block_b[row][col] = src_b[(shift * b_size) + row][(j * b_size) + col];
24 }
25 }
26 76 }
27
28 34 void ScatterBlockIntoResult(Matrix &local_result, const Matrix &block_c, int local_i, int j, int b_size) {
29
2/2
✓ Branch 0 taken 86 times.
✓ Branch 1 taken 34 times.
120 for (int row = 0; row < b_size; ++row) {
30
2/2
✓ Branch 0 taken 242 times.
✓ Branch 1 taken 86 times.
328 for (int col = 0; col < b_size; ++col) {
31 242 local_result[(local_i * b_size) + row][(j * b_size) + col] = block_c[row][col];
32 }
33 }
34 34 }
35
36
2/2
✓ Branch 0 taken 47 times.
✓ Branch 1 taken 1 times.
48 std::vector<double> FlattenMatrix(const Matrix &matrix) {
37 const std::size_t rows = matrix.size();
38
2/2
✓ Branch 0 taken 47 times.
✓ Branch 1 taken 1 times.
48 const std::size_t cols = rows == 0 ? 0 : matrix[0].size();
39 48 std::vector<double> flat(rows * cols);
40
41
2/2
✓ Branch 0 taken 200 times.
✓ Branch 1 taken 48 times.
248 for (std::size_t row = 0; row < rows; ++row) {
42 200 std::copy(matrix[row].begin(), matrix[row].end(), flat.begin() + static_cast<std::ptrdiff_t>(row * cols));
43 }
44
45 48 return flat;
46 }
47
48 48 Matrix UnflattenMatrix(const std::vector<double> &flat, std::size_t rows, std::size_t cols) {
49
1/2
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
48 Matrix matrix(rows, std::vector<double>(cols));
50
51
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 48 times.
288 for (std::size_t row = 0; row < rows; ++row) {
52 240 const std::ptrdiff_t begin_idx = (static_cast<std::ptrdiff_t>(row) * static_cast<std::ptrdiff_t>(cols));
53 240 const std::ptrdiff_t end_idx = (static_cast<std::ptrdiff_t>(row + 1) * static_cast<std::ptrdiff_t>(cols));
54 240 std::copy(flat.begin() + begin_idx, flat.begin() + end_idx, matrix[row].begin());
55 }
56
57 48 return matrix;
58 }
59
60 16 std::pair<std::vector<int>, std::vector<int>> BuildGatherLayout(int size, int base_block_rows, int extra_block_rows,
61 int b_size, int n) {
62 16 std::vector<int> recv_counts(size);
63
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<int> displs(size);
64 int offset = 0;
65
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int proc = 0; proc < size; ++proc) {
66
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
32 const int proc_block_rows = base_block_rows + (proc < extra_block_rows ? 1 : 0);
67 32 recv_counts[proc] = proc_block_rows * b_size * n;
68 32 displs[proc] = offset;
69 32 offset += recv_counts[proc];
70 }
71
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
32 return {recv_counts, displs};
72 }
73
74 } // namespace
75
76 16 TimurACannonMatrixMultiplicationALL::TimurACannonMatrixMultiplicationALL(
77
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 const std::tuple<int, std::vector<std::vector<double>>, std::vector<std::vector<double>>> &in) {
78 SetTypeOfTask(GetStaticTypeOfTask());
79 GetInput() = in;
80 16 }
81
82 16 bool TimurACannonMatrixMultiplicationALL::ValidationImpl() {
83 const auto &input = GetInput();
84 16 const int b_size = std::get<0>(input);
85 const auto &mat_a = std::get<1>(input);
86 const auto &mat_b = std::get<2>(input);
87
88
3/6
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
16 if (b_size <= 0 || mat_a.empty() || mat_b.empty()) {
89 return false;
90 }
91
92 const std::size_t n = mat_a.size();
93
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 if (mat_b.size() != n || (n % static_cast<std::size_t>(b_size) != 0)) {
94 return false;
95 }
96
97 const auto is_square_n = [n](const Matrix &matrix) {
98 return std::ranges::all_of(matrix, [n](const std::vector<double> &row) { return row.size() == n; });
99 };
100
101
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 return is_square_n(mat_a) && is_square_n(mat_b);
102 }
103
104
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 bool TimurACannonMatrixMultiplicationALL::PreProcessingImpl() {
105 GetOutput().clear();
106 16 return true;
107 }
108
109 76 void TimurACannonMatrixMultiplicationALL::BlockMultiplyAccumulate(const std::vector<std::vector<double>> &a,
110 const std::vector<std::vector<double>> &b,
111 std::vector<std::vector<double>> &c, int b_size) {
112
2/2
✓ Branch 0 taken 196 times.
✓ Branch 1 taken 76 times.
272 for (int i = 0; i < b_size; ++i) {
113
2/2
✓ Branch 0 taken 556 times.
✓ Branch 1 taken 196 times.
752 for (int k = 0; k < b_size; ++k) {
114 556 const double temp = a[i][k];
115
2/2
✓ Branch 0 taken 1684 times.
✓ Branch 1 taken 556 times.
2240 for (int j = 0; j < b_size; ++j) {
116 1684 c[i][j] += temp * b[k][j];
117 }
118 }
119 }
120 76 }
121
122 16 std::vector<std::vector<double>> TimurACannonMatrixMultiplicationALL::ComputeLocalResult(const Matrix &src_a,
123 const Matrix &src_b,
124 int b_size, int grid_sz,
125 int block_row_start,
126 int local_block_rows, int n) {
127 16 Matrix local_result(static_cast<std::size_t>(local_block_rows) * static_cast<std::size_t>(b_size),
128
1/2
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 std::vector<double>(static_cast<std::size_t>(n), 0.0));
129
130 16 #pragma omp parallel for default(none) \
131 shared(local_result, src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows)
132 for (int local_i = 0; local_i < local_block_rows; ++local_i) {
133 for (int j = 0; j < grid_sz; ++j) {
134 Matrix block_c(b_size, std::vector<double>(b_size, 0.0));
135 const int global_i = block_row_start + local_i;
136
137 for (int step = 0; step < grid_sz; ++step) {
138 const int shift = (global_i + j + step) % grid_sz;
139 Matrix block_a(b_size, std::vector<double>(b_size));
140 Matrix block_b(b_size, std::vector<double>(b_size));
141 CopyBlocksForStep(src_a, src_b, b_size, global_i, shift, j, block_a, block_b);
142 BlockMultiplyAccumulate(block_a, block_b, block_c, b_size);
143 }
144
145 ScatterBlockIntoResult(local_result, block_c, local_i, j, b_size);
146 }
147 }
148
149 16 return local_result;
150 }
151
152 16 bool TimurACannonMatrixMultiplicationALL::RunImpl() {
153 const auto &input = GetInput();
154 16 const int b_size = std::get<0>(input);
155 16 Matrix src_a = std::get<1>(input);
156
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 Matrix src_b = std::get<2>(input);
157 16 const int n = static_cast<int>(src_a.size());
158 16 const int grid_sz = n / b_size;
159 16 const int total_elems = n * n;
160
161 16 int rank = 0;
162 16 int size = 1;
163
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
164
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Comm_size(MPI_COMM_WORLD, &size);
165
166
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::vector<double> flat_a = FlattenMatrix(src_a);
167
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::vector<double> flat_b = FlattenMatrix(src_b);
168
169
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Bcast(flat_a.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
170
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Bcast(flat_b.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
171
172
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 src_a = UnflattenMatrix(flat_a, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
173
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
16 src_b = UnflattenMatrix(flat_b, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
174
175 16 const int base_block_rows = grid_sz / size;
176 16 const int extra_block_rows = grid_sz % size;
177
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 const int local_block_rows = base_block_rows + (rank < extra_block_rows ? 1 : 0);
178
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 const int block_row_start = (rank * base_block_rows) + std::min(rank, extra_block_rows);
179
180
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 Matrix local_result = ComputeLocalResult(src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows, n);
181
182
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::vector<double> local_flat = FlattenMatrix(local_result);
183
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 auto [recv_counts, displs] = BuildGatherLayout(size, base_block_rows, extra_block_rows, b_size, n);
184
185
2/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
16 std::vector<double> global_flat(total_elems);
186
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Allgatherv(local_flat.data(), static_cast<int>(local_flat.size()), MPI_DOUBLE, global_flat.data(),
187 recv_counts.data(), displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
188
189
2/6
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 16 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
16 GetOutput() = UnflattenMatrix(global_flat, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
190 16 return true;
191 48 }
192
193 16 bool TimurACannonMatrixMultiplicationALL::PostProcessingImpl() {
194 16 return true;
195 }
196
197 } // namespace timur_a_cannon
198