| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zavyalov_a_complex_sparse_matrix_mult/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <map> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "oneapi/tbb/parallel_for.h" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | #include "zavyalov_a_complex_sparse_matrix_mult/common/include/common.hpp" | ||
| 16 | |||
| 17 | namespace zavyalov_a_compl_sparse_matr_mult { | ||
| 18 | namespace { | ||
| 19 | template <typename T> | ||
| 20 | std::vector<uint64_t> ToMPI(const std::vector<T> &v) { | ||
| 21 | std::vector<uint64_t> res(v.size()); | ||
| 22 | for (size_t i = 0; i < v.size(); ++i) { | ||
| 23 | res[i] = static_cast<uint64_t>(v[i]); | ||
| 24 | } | ||
| 25 | return res; | ||
| 26 | } | ||
| 27 | |||
| 28 | template <typename T> | ||
| 29 | std::vector<T> FromMPI(const std::vector<uint64_t> &v) { | ||
| 30 | std::vector<T> res(v.size()); | ||
| 31 | for (size_t i = 0; i < v.size(); ++i) { | ||
| 32 | res[i] = static_cast<T>(v[i]); | ||
| 33 | } | ||
| 34 | return res; | ||
| 35 | } | ||
| 36 | |||
| 37 | 20 | void BroadcastMatrix(SparseMatrix &m) { | |
| 38 | 20 | int rank = 0; | |
| 39 | 20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 40 | |||
| 41 | 20 | std::array<int, 3> meta{}; | |
| 42 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 43 | 10 | meta[0] = static_cast<int>(m.height); | |
| 44 | 10 | meta[1] = static_cast<int>(m.width); | |
| 45 | 10 | meta[2] = static_cast<int>(m.val.size()); | |
| 46 | } | ||
| 47 | |||
| 48 | 20 | MPI_Bcast(meta.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 49 | |||
| 50 | 20 | m.height = static_cast<size_t>(meta[0]); | |
| 51 | 20 | m.width = static_cast<size_t>(meta[1]); | |
| 52 | 20 | auto count = static_cast<size_t>(meta[2]); | |
| 53 | |||
| 54 | 20 | m.row_ind.resize(count); | |
| 55 | 20 | m.col_ind.resize(count); | |
| 56 | 20 | m.val.resize(count); | |
| 57 | |||
| 58 | 20 | std::vector<int> tmp_rows(count); | |
| 59 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> tmp_cols(count); |
| 60 | |||
| 61 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 62 |
2/2✓ Branch 0 taken 143 times.
✓ Branch 1 taken 10 times.
|
153 | for (size_t i = 0; i < count; ++i) { |
| 63 | 143 | tmp_rows[i] = static_cast<int>(m.row_ind[i]); | |
| 64 | 143 | tmp_cols[i] = static_cast<int>(m.col_ind[i]); | |
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(tmp_rows.data(), static_cast<int>(count), MPI_INT, 0, MPI_COMM_WORLD); |
| 69 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(tmp_cols.data(), static_cast<int>(count), MPI_INT, 0, MPI_COMM_WORLD); |
| 70 | |||
| 71 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank != 0) { |
| 72 |
2/2✓ Branch 0 taken 143 times.
✓ Branch 1 taken 10 times.
|
153 | for (size_t i = 0; i < count; ++i) { |
| 73 | 143 | m.row_ind[i] = static_cast<size_t>(tmp_rows[i]); | |
| 74 | 143 | m.col_ind[i] = static_cast<size_t>(tmp_cols[i]); | |
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<double> re(count); |
| 79 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<double> im(count); |
| 80 | |||
| 81 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 82 |
2/2✓ Branch 0 taken 143 times.
✓ Branch 1 taken 10 times.
|
153 | for (size_t i = 0; i < count; ++i) { |
| 83 | 143 | re[i] = m.val[i].re; | |
| 84 | 143 | im[i] = m.val[i].im; | |
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(re.data(), static_cast<int>(count), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 89 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(im.data(), static_cast<int>(count), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 90 | |||
| 91 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank != 0) { |
| 92 |
2/2✓ Branch 0 taken 143 times.
✓ Branch 1 taken 10 times.
|
153 | for (size_t i = 0; i < count; ++i) { |
| 93 | 143 | m.val[i] = Complex(re[i], im[i]); | |
| 94 | } | ||
| 95 | } | ||
| 96 | 20 | } | |
| 97 | |||
| 98 | 20 | void ScatterMatrixA(int rank, int world_size, size_t total, const SparseMatrix &ma, std::vector<int> &sendcounts, | |
| 99 | std::vector<int> &displs, std::vector<size_t> &local_rows, std::vector<size_t> &local_cols, | ||
| 100 | std::vector<double> &local_re, std::vector<double> &local_im) { | ||
| 101 | 20 | int blocksize = static_cast<int>(total) / world_size; | |
| 102 | 20 | int leftover = static_cast<int>(total) % world_size; | |
| 103 | |||
| 104 | int offset = 0; | ||
| 105 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (int proc = 0; proc < world_size; ++proc) { |
| 106 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 10 times.
|
70 | sendcounts[proc] = blocksize + (proc < leftover ? 1 : 0); |
| 107 | 40 | displs[proc] = offset; | |
| 108 | 40 | offset += sendcounts[proc]; | |
| 109 | } | ||
| 110 | |||
| 111 | 20 | int local_count = sendcounts[rank]; | |
| 112 | |||
| 113 | 20 | local_rows.resize(local_count); | |
| 114 | 20 | local_cols.resize(local_count); | |
| 115 | 20 | local_re.resize(local_count); | |
| 116 | 20 | local_im.resize(local_count); | |
| 117 | |||
| 118 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 119 |
2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 10 times.
|
95 | for (int i = 0; i < local_count; ++i) { |
| 120 | 85 | local_rows[i] = ma.row_ind[i]; | |
| 121 | 85 | local_cols[i] = ma.col_ind[i]; | |
| 122 | 85 | local_re[i] = ma.val[i].re; | |
| 123 | 85 | local_im[i] = ma.val[i].im; | |
| 124 | } | ||
| 125 | |||
| 126 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | for (int proc = 1; proc < world_size; ++proc) { |
| 127 | 10 | int cnt = sendcounts[proc]; | |
| 128 | 10 | int dsp = displs[proc]; | |
| 129 | |||
| 130 | 10 | std::vector<int> rows_send(cnt); | |
| 131 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> cols_send(cnt); |
| 132 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<double> re_buf(cnt); |
| 133 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<double> im_buf(cnt); |
| 134 | |||
| 135 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 10 times.
|
90 | for (int i = 0; i < cnt; ++i) { |
| 136 | 80 | rows_send[i] = static_cast<int>(ma.row_ind[dsp + i]); | |
| 137 | 80 | cols_send[i] = static_cast<int>(ma.col_ind[dsp + i]); | |
| 138 | 80 | re_buf[i] = ma.val[dsp + i].re; | |
| 139 | 80 | im_buf[i] = ma.val[dsp + i].im; | |
| 140 | } | ||
| 141 | |||
| 142 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(rows_send.data(), cnt, MPI_INT, proc, 0, MPI_COMM_WORLD); |
| 143 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(cols_send.data(), cnt, MPI_INT, proc, 1, MPI_COMM_WORLD); |
| 144 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(re_buf.data(), cnt, MPI_DOUBLE, proc, 2, MPI_COMM_WORLD); |
| 145 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(im_buf.data(), cnt, MPI_DOUBLE, proc, 3, MPI_COMM_WORLD); |
| 146 | } | ||
| 147 | |||
| 148 | } else { | ||
| 149 | 10 | std::vector<int> rows_recv(local_count); | |
| 150 |
2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
10 | std::vector<int> cols_recv(local_count); |
| 151 | |||
| 152 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(rows_recv.data(), local_count, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 153 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(cols_recv.data(), local_count, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 154 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(local_re.data(), local_count, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 155 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(local_im.data(), local_count, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 156 | |||
| 157 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 10 times.
|
90 | for (int i = 0; i < local_count; ++i) { |
| 158 | 80 | local_rows[i] = static_cast<size_t>(rows_recv[i]); | |
| 159 | 80 | local_cols[i] = static_cast<size_t>(cols_recv[i]); | |
| 160 | } | ||
| 161 | } | ||
| 162 | 20 | } | |
| 163 | 20 | void GatherResult(int rank, int world_size, const std::vector<size_t> &rows, const std::vector<size_t> &cols, | |
| 164 | const std::vector<double> &re_vals, const std::vector<double> &im_vals, size_t a_height, | ||
| 165 | size_t b_width, SparseMatrix &output) { | ||
| 166 | 20 | int local_count = static_cast<int>(rows.size()); | |
| 167 | |||
| 168 |
1/2✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
20 | std::vector<int> all_counts(world_size); |
| 169 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gather(&local_count, 1, MPI_INT, all_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 170 | |||
| 171 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> displs(world_size, 0); |
| 172 | int total_count = 0; | ||
| 173 | |||
| 174 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 175 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
|
30 | for (int i = 0; i < world_size; ++i) { |
| 176 | 20 | displs[i] = total_count; | |
| 177 | 20 | total_count += all_counts[i]; | |
| 178 | } | ||
| 179 | } | ||
| 180 | |||
| 181 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> all_rows(total_count); |
| 182 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> all_cols(total_count); |
| 183 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<double> all_re(total_count); |
| 184 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<double> all_im(total_count); |
| 185 | |||
| 186 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> rows_int(local_count); |
| 187 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> cols_int(local_count); |
| 188 | |||
| 189 |
2/2✓ Branch 0 taken 293 times.
✓ Branch 1 taken 20 times.
|
313 | for (int i = 0; i < local_count; ++i) { |
| 190 | 293 | rows_int[i] = static_cast<int>(rows[i]); | |
| 191 | 293 | cols_int[i] = static_cast<int>(cols[i]); | |
| 192 | } | ||
| 193 | |||
| 194 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gatherv(rows_int.data(), local_count, MPI_INT, all_rows.data(), all_counts.data(), displs.data(), MPI_INT, 0, |
| 195 | MPI_COMM_WORLD); | ||
| 196 | |||
| 197 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gatherv(cols_int.data(), local_count, MPI_INT, all_cols.data(), all_counts.data(), displs.data(), MPI_INT, 0, |
| 198 | MPI_COMM_WORLD); | ||
| 199 | |||
| 200 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gatherv(re_vals.data(), local_count, MPI_DOUBLE, all_re.data(), all_counts.data(), displs.data(), MPI_DOUBLE, 0, |
| 201 | MPI_COMM_WORLD); | ||
| 202 | |||
| 203 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gatherv(im_vals.data(), local_count, MPI_DOUBLE, all_im.data(), all_counts.data(), displs.data(), MPI_DOUBLE, 0, |
| 204 | MPI_COMM_WORLD); | ||
| 205 | |||
| 206 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 207 | std::map<std::pair<size_t, size_t>, Complex> mp; | ||
| 208 | |||
| 209 |
2/2✓ Branch 0 taken 293 times.
✓ Branch 1 taken 10 times.
|
303 | for (int i = 0; i < total_count; ++i) { |
| 210 |
1/2✓ Branch 1 taken 293 times.
✗ Branch 2 not taken.
|
293 | mp[{static_cast<size_t>(all_rows[i]), static_cast<size_t>(all_cols[i])}] += Complex(all_re[i], all_im[i]); |
| 211 | } | ||
| 212 | |||
| 213 | 10 | output.height = a_height; | |
| 214 | 10 | output.width = b_width; | |
| 215 | |||
| 216 |
2/2✓ Branch 0 taken 147 times.
✓ Branch 1 taken 10 times.
|
157 | for (auto &[k, v] : mp) { |
| 217 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 47 times.
|
147 | output.row_ind.push_back(k.first); |
| 218 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 47 times.
|
147 | output.col_ind.push_back(k.second); |
| 219 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 47 times.
|
147 | output.val.push_back(v); |
| 220 | } | ||
| 221 | } | ||
| 222 | 20 | } | |
| 223 | } // namespace | ||
| 224 | |||
| 225 |
1/2✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
20 | ZavyalovAComplSparseMatrMultALL::ZavyalovAComplSparseMatrMultALL(const InType &in) { |
| 226 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 227 | 20 | int rank = 0; | |
| 228 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 229 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 230 | GetInput() = in; | ||
| 231 | } | ||
| 232 | 20 | } | |
| 233 | |||
| 234 | 20 | bool ZavyalovAComplSparseMatrMultALL::ValidationImpl() { | |
| 235 | 20 | int rank = 0; | |
| 236 | 20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 237 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank != 0) { |
| 238 | return true; | ||
| 239 | } | ||
| 240 | const auto &matr_a = std::get<0>(GetInput()); | ||
| 241 | const auto &matr_b = std::get<1>(GetInput()); | ||
| 242 | 10 | return matr_a.width == matr_b.height; | |
| 243 | } | ||
| 244 | |||
| 245 | 20 | bool ZavyalovAComplSparseMatrMultALL::PreProcessingImpl() { | |
| 246 | 20 | return true; | |
| 247 | } | ||
| 248 | |||
| 249 | 20 | std::map<std::pair<size_t, size_t>, Complex> ZavyalovAComplSparseMatrMultALL::ComputeLocalChunk( | |
| 250 | const SparseMatrix &matr_a, const SparseMatrix &matr_b, size_t start, size_t end) { | ||
| 251 | 20 | int num_threads = ppc::util::GetNumThreads(); | |
| 252 | 20 | std::vector<std::map<std::pair<size_t, size_t>, Complex>> local_maps(num_threads); | |
| 253 | |||
| 254 | 20 | #pragma omp parallel for num_threads(num_threads) schedule(static) default(none) \ | |
| 255 | shared(matr_a, matr_b, local_maps, start, end) | ||
| 256 | for (size_t i = start; i < end; ++i) { | ||
| 257 | int tid = omp_get_thread_num(); | ||
| 258 | size_t row_a = matr_a.row_ind[i]; | ||
| 259 | size_t col_a = matr_a.col_ind[i]; | ||
| 260 | Complex val_a = matr_a.val[i]; | ||
| 261 | |||
| 262 | for (size_t j = 0; j < matr_b.Count(); ++j) { | ||
| 263 | if (col_a == matr_b.row_ind[j]) { | ||
| 264 | local_maps[tid][{row_a, matr_b.col_ind[j]}] += val_a * matr_b.val[j]; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | std::map<std::pair<size_t, size_t>, Complex> result; | ||
| 270 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (auto &lm : local_maps) { |
| 271 |
2/2✓ Branch 0 taken 510 times.
✓ Branch 1 taken 40 times.
|
550 | for (auto &[key, value] : lm) { |
| 272 |
1/2✓ Branch 1 taken 510 times.
✗ Branch 2 not taken.
|
510 | result[key] += value; |
| 273 | } | ||
| 274 | } | ||
| 275 | 20 | return result; | |
| 276 | 20 | } | |
| 277 | 20 | bool ZavyalovAComplSparseMatrMultALL::RunImpl() { | |
| 278 | 20 | int rank = 0; | |
| 279 | 20 | int world_size = 1; | |
| 280 | 20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 281 | 20 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 282 | |||
| 283 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | SparseMatrix local_b = (rank == 0) ? std::get<1>(GetInput()) : SparseMatrix{}; |
| 284 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | BroadcastMatrix(local_b); |
| 285 | |||
| 286 | 20 | uint64_t total_mpi = 0; | |
| 287 | 20 | uint64_t a_height_mpi = 0; | |
| 288 | 20 | uint64_t a_width_mpi = 0; | |
| 289 | size_t total = 0; | ||
| 290 | size_t a_height = 0; | ||
| 291 | size_t a_width = 0; | ||
| 292 | |||
| 293 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 294 | const auto &ma = std::get<0>(GetInput()); | ||
| 295 | total = ma.Count(); | ||
| 296 | 10 | a_height = ma.height; | |
| 297 | 10 | a_width = ma.width; | |
| 298 | |||
| 299 | 10 | total_mpi = static_cast<uint64_t>(total); | |
| 300 | 10 | a_height_mpi = static_cast<uint64_t>(a_height); | |
| 301 | 10 | a_width_mpi = static_cast<uint64_t>(a_width); | |
| 302 | } | ||
| 303 | |||
| 304 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(&total_mpi, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 305 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(&a_height_mpi, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 306 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Bcast(&a_width_mpi, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 307 | |||
| 308 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank != 0) { |
| 309 | 10 | total = static_cast<size_t>(total_mpi); | |
| 310 | 10 | a_height = static_cast<size_t>(a_height_mpi); | |
| 311 | 10 | a_width = static_cast<size_t>(a_width_mpi); | |
| 312 | } | ||
| 313 | |||
| 314 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | std::vector<int> sendcounts(world_size); |
| 315 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> displs(world_size); |
| 316 | 20 | std::vector<size_t> local_rows; | |
| 317 | 20 | std::vector<size_t> local_cols; | |
| 318 | 20 | std::vector<double> local_re; | |
| 319 | 20 | std::vector<double> local_im; | |
| 320 | |||
| 321 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 322 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | ScatterMatrixA(rank, world_size, total, std::get<0>(GetInput()), sendcounts, displs, local_rows, local_cols, |
| 323 | local_re, local_im); | ||
| 324 | } else { | ||
| 325 | 10 | SparseMatrix dummy; | |
| 326 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | ScatterMatrixA(rank, world_size, total, dummy, sendcounts, displs, local_rows, local_cols, local_re, local_im); |
| 327 | 10 | } | |
| 328 | |||
| 329 | 20 | SparseMatrix local_a; | |
| 330 | 20 | local_a.height = a_height; | |
| 331 | 20 | local_a.width = a_width; | |
| 332 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | local_a.row_ind = local_rows; |
| 333 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | local_a.col_ind = local_cols; |
| 334 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | local_a.val.resize(local_rows.size()); |
| 335 | |||
| 336 |
2/2✓ Branch 0 taken 165 times.
✓ Branch 1 taken 20 times.
|
185 | for (size_t i = 0; i < local_rows.size(); ++i) { |
| 337 | 165 | local_a.val[i] = Complex(local_re[i], local_im[i]); | |
| 338 | } | ||
| 339 | |||
| 340 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | auto local_mp = ComputeLocalChunk(local_a, local_b, 0, local_rows.size()); |
| 341 | |||
| 342 | 20 | std::vector<size_t> rows; | |
| 343 | 20 | std::vector<size_t> cols; | |
| 344 | |||
| 345 | 20 | std::vector<double> re_vals; | |
| 346 | 20 | std::vector<double> im_vals; | |
| 347 | |||
| 348 |
2/2✓ Branch 0 taken 293 times.
✓ Branch 1 taken 20 times.
|
313 | for (const auto &[key, val] : local_mp) { |
| 349 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 93 times.
|
293 | rows.push_back(key.first); |
| 350 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 93 times.
|
293 | cols.push_back(key.second); |
| 351 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 93 times.
|
293 | re_vals.push_back(val.re); |
| 352 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 93 times.
|
293 | im_vals.push_back(val.im); |
| 353 | } | ||
| 354 | |||
| 355 | 20 | SparseMatrix result; | |
| 356 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | GatherResult(rank, world_size, rows, cols, re_vals, im_vals, a_height, local_b.width, result); |
| 357 | |||
| 358 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 359 | 10 | GetOutput() = std::move(result); | |
| 360 | } | ||
| 361 | |||
| 362 | 20 | return true; | |
| 363 | 60 | } | |
| 364 | |||
| 365 | 20 | bool ZavyalovAComplSparseMatrMultALL::PostProcessingImpl() { | |
| 366 | 20 | return true; | |
| 367 | } | ||
| 368 | |||
| 369 | } // namespace zavyalov_a_compl_sparse_matr_mult | ||
| 370 |