GCC Code Coverage Report


Directory: ./
File: tasks/agafonov_i_matrix_ccs_seq/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 0 115 0.0%
Functions: 0 8 0.0%
Branches: 0 142 0.0%

Line Branch Exec Source
1 #include "agafonov_i_matrix_ccs_seq/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <tbb/blocked_range.h>
5 #include <tbb/parallel_for.h>
6
7 #include <algorithm>
8 #include <cmath>
9 #include <cstddef>
10 #include <utility>
11 #include <vector>
12
13 #include "agafonov_i_matrix_ccs_seq/common/include/common.hpp"
14
15 namespace agafonov_i_matrix_ccs_seq {
16
17 namespace {
18
19 void AssembleFinalMatrix(InType::first_type &c, size_t total_cols, size_t chunk, size_t rem, int size,
20 const std::vector<int> &all_col_nnz, const std::vector<int> &displs,
21 const std::vector<double> &all_vals, const std::vector<int> &all_rind) {
22 std::vector<size_t> proc_col_start(size);
23 std::vector<size_t> proc_col_end(size);
24 for (int proc_idx = 0; proc_idx < size; ++proc_idx) {
25 proc_col_start[proc_idx] = (static_cast<size_t>(proc_idx) * chunk) + std::min(static_cast<size_t>(proc_idx), rem);
26 proc_col_end[proc_idx] = proc_col_start[proc_idx] + chunk + (std::cmp_less(proc_idx, rem) ? 1 : 0);
27 }
28
29 std::vector<std::vector<double>> col_vals(total_cols);
30 std::vector<std::vector<int>> col_rind(total_cols);
31 for (int proc_idx = 0; proc_idx < size; ++proc_idx) {
32 int offset = displs[proc_idx];
33 for (size_t j = proc_col_start[proc_idx]; j < proc_col_end[proc_idx]; ++j) {
34 int n = all_col_nnz[j];
35 col_vals[j].assign(all_vals.begin() + offset, all_vals.begin() + offset + n);
36 col_rind[j].assign(all_rind.begin() + offset, all_rind.begin() + offset + n);
37 offset += n;
38 }
39 }
40
41 int cur_nnz = 0;
42 for (size_t j = 0; j < total_cols; ++j) {
43 c.col_ptrs[j] = cur_nnz;
44 c.vals.insert(c.vals.end(), col_vals[j].begin(), col_vals[j].end());
45 c.row_inds.insert(c.row_inds.end(), col_rind[j].begin(), col_rind[j].end());
46 cur_nnz += static_cast<int>(col_vals[j].size());
47 }
48 c.col_ptrs[total_cols] = cur_nnz;
49 c.nnz = cur_nnz;
50 }
51
52 } // namespace
53
54 AgafonovIMatrixCCSALL::AgafonovIMatrixCCSALL(const InType &in) {
55 SetTypeOfTask(GetStaticTypeOfTask());
56 GetInput() = in;
57 }
58
59 bool AgafonovIMatrixCCSALL::ValidationImpl() {
60 const auto &left = GetInput().first;
61 const auto &right = GetInput().second;
62 return (left.cols_num == right.rows_num) && (left.col_ptrs.size() == left.cols_num + 1) &&
63 (right.col_ptrs.size() == right.cols_num + 1);
64 }
65
66 bool AgafonovIMatrixCCSALL::PreProcessingImpl() {
67 GetOutput().vals.clear();
68 GetOutput().row_inds.clear();
69 return true;
70 }
71
72 void AgafonovIMatrixCCSALL::ProcessColumn(size_t j, const InType::first_type &a, const InType::second_type &b,
73 std::vector<double> &accumulator, std::vector<size_t> &active_rows,
74 std::vector<bool> &row_mask, std::vector<double> &local_v,
75 std::vector<int> &local_r) {
76 const auto b_col_start = static_cast<size_t>(b.col_ptrs[j]);
77 const auto b_col_end = static_cast<size_t>(b.col_ptrs[j + 1]);
78 if (b_col_start == b_col_end) {
79 return;
80 }
81
82 for (size_t kb = b_col_start; kb < b_col_end; ++kb) {
83 const auto k = static_cast<size_t>(b.row_inds[kb]);
84 const double v_b = b.vals[kb];
85 const auto a_col_start = static_cast<size_t>(a.col_ptrs[k]);
86 const auto a_col_end = static_cast<size_t>(a.col_ptrs[k + 1]);
87
88 for (size_t ka = a_col_start; ka < a_col_end; ++ka) {
89 const auto i = static_cast<size_t>(a.row_inds[ka]);
90 if (!row_mask[i]) {
91 row_mask[i] = true;
92 active_rows.push_back(i);
93 }
94 accumulator[i] += a.vals[ka] * v_b;
95 }
96 }
97
98 std::ranges::sort(active_rows);
99
100 for (const auto row_idx : active_rows) {
101 if (std::abs(accumulator[row_idx]) > 1e-15) {
102 local_v.push_back(accumulator[row_idx]);
103 local_r.push_back(static_cast<int>(row_idx));
104 }
105 row_mask[row_idx] = false;
106 accumulator[row_idx] = 0.0;
107 }
108 active_rows.clear();
109 }
110
111 bool AgafonovIMatrixCCSALL::RunImpl() {
112 const auto &a = GetInput().first;
113 const auto &b = GetInput().second;
114 auto &c = GetOutput();
115
116 int rank = 0;
117 int size = 1;
118 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
119 MPI_Comm_size(MPI_COMM_WORLD, &size);
120
121 c.rows_num = a.rows_num;
122 c.cols_num = b.cols_num;
123 c.col_ptrs.assign(c.cols_num + 1, 0);
124
125 const size_t total_cols = b.cols_num;
126 const size_t chunk = total_cols / static_cast<size_t>(size);
127 const size_t rem = total_cols % static_cast<size_t>(size);
128 const size_t col_start = (static_cast<size_t>(rank) * chunk) + std::min(static_cast<size_t>(rank), rem);
129 const size_t col_end = col_start + chunk + (std::cmp_less(rank, rem) ? 1 : 0);
130
131 std::vector<std::vector<double>> local_vals(total_cols);
132 std::vector<std::vector<int>> local_rows(total_cols);
133
134 tbb::parallel_for(tbb::blocked_range<size_t>(col_start, col_end), [&](const tbb::blocked_range<size_t> &range) {
135 std::vector<double> accumulator(a.rows_num, 0.0);
136 std::vector<size_t> active_rows;
137 std::vector<bool> row_mask(a.rows_num, false);
138
139 for (size_t j = range.begin(); j < range.end(); ++j) {
140 ProcessColumn(j, a, b, accumulator, active_rows, row_mask, local_vals[j], local_rows[j]);
141 }
142 });
143
144 std::vector<int> my_col_nnz(total_cols, 0);
145 for (size_t j = col_start; j < col_end; ++j) {
146 my_col_nnz[j] = static_cast<int>(local_vals[j].size());
147 }
148
149 std::vector<int> all_col_nnz;
150 if (rank == 0) {
151 all_col_nnz.resize(total_cols, 0);
152 }
153
154 MPI_Reduce(my_col_nnz.data(), all_col_nnz.data(), static_cast<int>(total_cols), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
155
156 std::vector<double> my_vals;
157 std::vector<int> my_rind;
158 for (size_t j = col_start; j < col_end; ++j) {
159 my_vals.insert(my_vals.end(), local_vals[j].begin(), local_vals[j].end());
160 my_rind.insert(my_rind.end(), local_rows[j].begin(), local_rows[j].end());
161 }
162
163 int my_count = static_cast<int>(my_vals.size());
164 std::vector<int> counts(size);
165 MPI_Gather(&my_count, 1, MPI_INT, counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
166
167 std::vector<int> displs(size, 0);
168 int total_nnz = 0;
169 if (rank == 0) {
170 for (int i = 0; i < size; ++i) {
171 displs[i] = total_nnz;
172 total_nnz += counts[i];
173 }
174 }
175
176 std::vector<double> all_vals;
177 std::vector<int> all_rind;
178 if (rank == 0) {
179 all_vals.resize(total_nnz);
180 all_rind.resize(total_nnz);
181 }
182
183 MPI_Gatherv(my_vals.data(), my_count, MPI_DOUBLE, all_vals.data(), counts.data(), displs.data(), MPI_DOUBLE, 0,
184 MPI_COMM_WORLD);
185 MPI_Gatherv(my_rind.data(), my_count, MPI_INT, all_rind.data(), counts.data(), displs.data(), MPI_INT, 0,
186 MPI_COMM_WORLD);
187
188 if (rank == 0) {
189 AssembleFinalMatrix(c, total_cols, chunk, rem, size, all_col_nnz, displs, all_vals, all_rind);
190 }
191
192 MPI_Bcast(&c.rows_num, 1, MPI_INT, 0, MPI_COMM_WORLD);
193 MPI_Bcast(&c.cols_num, 1, MPI_INT, 0, MPI_COMM_WORLD);
194 MPI_Bcast(&c.nnz, 1, MPI_INT, 0, MPI_COMM_WORLD);
195
196 if (rank != 0) {
197 c.col_ptrs.resize(c.cols_num + 1);
198 c.vals.resize(c.nnz);
199 c.row_inds.resize(c.nnz);
200 }
201
202 MPI_Bcast(c.col_ptrs.data(), static_cast<int>(c.col_ptrs.size()), MPI_INT, 0, MPI_COMM_WORLD);
203
204 if (c.nnz > 0) {
205 MPI_Bcast(c.vals.data(), static_cast<int>(c.nnz), MPI_DOUBLE, 0, MPI_COMM_WORLD);
206 MPI_Bcast(c.row_inds.data(), static_cast<int>(c.nnz), MPI_INT, 0, MPI_COMM_WORLD);
207 }
208 return true;
209 }
210
211 bool AgafonovIMatrixCCSALL::PostProcessingImpl() {
212 return true;
213 }
214
215 } // namespace agafonov_i_matrix_ccs_seq
216