GCC Code Coverage Report


Directory: ./
File: tasks/sokolov_k_matrix_double_fox/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 97 97 100.0%
Functions: 10 10 100.0%
Branches: 59 80 73.8%

Line Branch Exec Source
1 #include "sokolov_k_matrix_double_fox/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <cmath>
7 #include <cstddef>
8 #include <vector>
9
10 #include "sokolov_k_matrix_double_fox/common/include/common.hpp"
11
12 namespace sokolov_k_matrix_double_fox {
13
14 namespace {
15
16 48 void DecomposeToBlocksAll(const std::vector<double> &flat, std::vector<double> &blocks, int n, int bs, int q) {
17
2/2
✓ Branch 0 taken 220 times.
✓ Branch 1 taken 48 times.
268 for (int bi = 0; bi < q; bi++) {
18
2/2
✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 220 times.
1624 for (int bj = 0; bj < q; bj++) {
19 1404 int block_off = ((bi * q) + bj) * (bs * bs);
20
2/2
✓ Branch 0 taken 6892 times.
✓ Branch 1 taken 1404 times.
8296 for (int i = 0; i < bs; i++) {
21
2/2
✓ Branch 0 taken 51540 times.
✓ Branch 1 taken 6892 times.
58432 for (int j = 0; j < bs; j++) {
22 51540 blocks[block_off + (i * bs) + j] = flat[(((bi * bs) + i) * n) + ((bj * bs) + j)];
23 }
24 }
25 }
26 }
27 48 }
28
29 24 void AssembleFromBlocksAll(const std::vector<double> &blocks, std::vector<double> &flat, int n, int bs, int q) {
30
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 24 times.
134 for (int bi = 0; bi < q; bi++) {
31
2/2
✓ Branch 0 taken 702 times.
✓ Branch 1 taken 110 times.
812 for (int bj = 0; bj < q; bj++) {
32 702 int block_off = ((bi * q) + bj) * (bs * bs);
33
2/2
✓ Branch 0 taken 3446 times.
✓ Branch 1 taken 702 times.
4148 for (int i = 0; i < bs; i++) {
34
2/2
✓ Branch 0 taken 25770 times.
✓ Branch 1 taken 3446 times.
29216 for (int j = 0; j < bs; j++) {
35 25770 flat[(((bi * bs) + i) * n) + ((bj * bs) + j)] = blocks[block_off + (i * bs) + j];
36 }
37 }
38 }
39 }
40 24 }
41
42 2755 void MultiplyBlocksAll(const double *a, const double *b, double *c, int bs) {
43
2/2
✓ Branch 0 taken 16033 times.
✓ Branch 1 taken 2755 times.
18788 for (int i = 0; i < bs; i++) {
44
2/2
✓ Branch 0 taken 126643 times.
✓ Branch 1 taken 16033 times.
142676 for (int k = 0; k < bs; k++) {
45 126643 double val = a[(i * bs) + k];
46
2/2
✓ Branch 0 taken 1128025 times.
✓ Branch 1 taken 126643 times.
1254668 for (int j = 0; j < bs; j++) {
47 1128025 c[(i * bs) + j] += val * b[(k * bs) + j];
48 }
49 }
50 }
51 2755 }
52
53 void FoxStepMpiOmp(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q,
54 int step, int row_begin, int row_end) {
55 110 int bsq = bs * bs;
56 110 #pragma omp parallel for default(none) shared(a, b, c, bs, q, bsq, step, row_begin, row_end) schedule(static)
57 for (int i = row_begin; i < row_end; i++) {
58 int k = (i + step) % q;
59 for (int j = 0; j < q; j++) {
60 int a_off = ((i * q) + k) * bsq;
61 int b_off = ((k * q) + j) * bsq;
62 int c_off = ((i * q) + j) * bsq;
63 MultiplyBlocksAll(a.data() + a_off, b.data() + b_off, c.data() + c_off, bs);
64 }
65 }
66 }
67
68 24 int ChooseBlockSizeAll(int n) {
69
1/2
✓ Branch 0 taken 34 times.
✗ Branch 1 not taken.
34 for (int div = static_cast<int>(std::sqrt(static_cast<double>(n))); div >= 1; div--) {
70
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 24 times.
34 if (n % div == 0) {
71 return div;
72 }
73 }
74 return 1;
75 }
76
77 void ComputeRowRange(int rank, int num_procs, int rows_per, int leftover, int &row_start, int &row_count) {
78 24 if (rank < num_procs) {
79
4/6
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 11 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 11 times.
✗ Branch 5 not taken.
45 row_start = (rank * rows_per) + std::min(rank, leftover);
80
4/6
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 11 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 11 times.
✗ Branch 5 not taken.
62 row_count = rows_per + (rank < leftover ? 1 : 0);
81 } else {
82 row_start = 0;
83 row_count = 0;
84 }
85 }
86
87 24 void GatherResults(std::vector<double> &blocks_c, int rank, int num_procs, int rows_per, int leftover, int q, int bsq) {
88
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 if (rank == 0) {
89
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 12 times.
23 for (int pr = 1; pr < num_procs; pr++) {
90 int pr_start = 0;
91 int pr_count = 0;
92 ComputeRowRange(pr, num_procs, rows_per, leftover, pr_start, pr_count);
93
1/2
✓ Branch 0 taken 11 times.
✗ Branch 1 not taken.
11 if (pr_count > 0) {
94 11 int offset = pr_start * q * bsq;
95 11 int count = pr_count * q * bsq;
96 11 MPI_Recv(blocks_c.data() + offset, count, MPI_DOUBLE, pr, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
97 }
98 }
99
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
12 } else if (rank < num_procs) {
100 int my_start = 0;
101 int my_count = 0;
102 ComputeRowRange(rank, num_procs, rows_per, leftover, my_start, my_count);
103
1/2
✓ Branch 0 taken 11 times.
✗ Branch 1 not taken.
11 if (my_count > 0) {
104 11 int offset = my_start * q * bsq;
105 11 int count = my_count * q * bsq;
106 11 MPI_Send(blocks_c.data() + offset, count, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
107 }
108 }
109 24 }
110
111 } // namespace
112
113 24 SokolovKMatrixDoubleFoxALL::SokolovKMatrixDoubleFoxALL(const InType &in) {
114 SetTypeOfTask(GetStaticTypeOfTask());
115 24 GetInput() = in;
116 GetOutput() = 0;
117 24 }
118
119 24 bool SokolovKMatrixDoubleFoxALL::ValidationImpl() {
120
2/4
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.
24 return (GetInput() > 0) && (GetOutput() == 0);
121 }
122
123 24 bool SokolovKMatrixDoubleFoxALL::PreProcessingImpl() {
124 24 GetOutput() = 0;
125 24 n_ = GetInput();
126 24 block_size_ = ChooseBlockSizeAll(n_);
127 24 q_ = n_ / block_size_;
128 24 auto sz = static_cast<std::size_t>(n_) * n_;
129 24 std::vector<double> a(sz, 1.5);
130
1/4
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
24 std::vector<double> b(sz, 2.0);
131
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 blocks_a_.resize(sz);
132
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 blocks_b_.resize(sz);
133
1/4
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
24 blocks_c_.assign(sz, 0.0);
134 24 DecomposeToBlocksAll(a, blocks_a_, n_, block_size_, q_);
135 24 DecomposeToBlocksAll(b, blocks_b_, n_, block_size_, q_);
136 24 return true;
137 }
138
139
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 bool SokolovKMatrixDoubleFoxALL::RunImpl() {
140 std::ranges::fill(blocks_c_, 0.0);
141
142 24 int rank = 0;
143 24 int world_size = 1;
144 24 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
145 24 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
146
147 24 int total = static_cast<int>(blocks_a_.size());
148 24 MPI_Bcast(&n_, 1, MPI_INT, 0, MPI_COMM_WORLD);
149 24 MPI_Bcast(&block_size_, 1, MPI_INT, 0, MPI_COMM_WORLD);
150 24 MPI_Bcast(&q_, 1, MPI_INT, 0, MPI_COMM_WORLD);
151 24 MPI_Bcast(&total, 1, MPI_INT, 0, MPI_COMM_WORLD);
152
153
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 if (rank != 0) {
154 12 blocks_a_.resize(total);
155 12 blocks_b_.resize(total);
156 12 blocks_c_.assign(total, 0.0);
157 }
158
159 24 MPI_Bcast(blocks_a_.data(), total, MPI_DOUBLE, 0, MPI_COMM_WORLD);
160 24 MPI_Bcast(blocks_b_.data(), total, MPI_DOUBLE, 0, MPI_COMM_WORLD);
161
162
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 1 times.
24 int num_procs = std::min(world_size, q_);
163 24 int rows_per = q_ / std::max(num_procs, 1);
164 24 int leftover = q_ % std::max(num_procs, 1);
165
166 int my_row_start = 0;
167 int my_row_count = 0;
168
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 1 times.
24 ComputeRowRange(rank, num_procs, rows_per, leftover, my_row_start, my_row_count);
169
170
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 24 times.
134 for (int step = 0; step < q_; step++) {
171 110 FoxStepMpiOmp(blocks_a_, blocks_b_, blocks_c_, block_size_, q_, step, my_row_start, my_row_start + my_row_count);
172 }
173
174 24 int bsq = block_size_ * block_size_;
175 24 GatherResults(blocks_c_, rank, num_procs, rows_per, leftover, q_, bsq);
176
177 24 MPI_Bcast(blocks_c_.data(), total, MPI_DOUBLE, 0, MPI_COMM_WORLD);
178
179 24 MPI_Barrier(MPI_COMM_WORLD);
180 24 return true;
181 }
182
183 24 bool SokolovKMatrixDoubleFoxALL::PostProcessingImpl() {
184 24 std::vector<double> result(static_cast<std::size_t>(n_) * n_);
185 24 AssembleFromBlocksAll(blocks_c_, result, n_, block_size_, q_);
186 24 double expected = 3.0 * n_;
187
1/2
✓ Branch 0 taken 25770 times.
✗ Branch 1 not taken.
25770 bool ok = std::ranges::all_of(result, [expected](double v) { return std::abs(v - expected) <= 1e-9; });
188
2/4
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
24 GetOutput() = ok ? GetInput() : -1;
189 std::vector<double>().swap(blocks_a_);
190 std::vector<double>().swap(blocks_b_);
191 std::vector<double>().swap(blocks_c_);
192 24 return true;
193 }
194
195 } // namespace sokolov_k_matrix_double_fox
196