| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kulik_a_mat_mul_double_ccs/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <limits> | ||
| 9 | #include <stdexcept> | ||
| 10 | #include <tuple> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "kulik_a_mat_mul_double_ccs/common/include/common.hpp" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | |||
| 16 | namespace kulik_a_mat_mul_double_ccs { | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | |||
| 20 | constexpr int kTagMatrix = 1001; | ||
| 21 | |||
| 22 | 16 | inline int ToIntCount(size_t value) { | |
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (value > static_cast<size_t>(std::numeric_limits<int>::max())) { |
| 24 | ✗ | throw std::runtime_error("MPI count overflow"); | |
| 25 | } | ||
| 26 | 16 | return static_cast<int>(value); | |
| 27 | } | ||
| 28 | |||
| 29 | 6 | inline size_t EstimateColumnCost(const CCS &a, const CCS &b, size_t j) { | |
| 30 | size_t cost = 0; | ||
| 31 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
|
15 | for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) { |
| 32 | 9 | const size_t a_col = b.row[k]; | |
| 33 | 9 | cost += a.col_ind[a_col + 1] - a.col_ind[a_col]; | |
| 34 | } | ||
| 35 | 6 | return cost; | |
| 36 | } | ||
| 37 | |||
| 38 | 1 | inline std::vector<int> BuildBalancedStarts(const std::vector<size_t> &weights, int parts) { | |
| 39 | 1 | if (parts <= 0) { | |
| 40 | parts = 1; | ||
| 41 | } | ||
| 42 | 1 | const int n = static_cast<int>(weights.size()); | |
| 43 | 1 | std::vector<int> starts(static_cast<size_t>(parts) + 1, 0); | |
| 44 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (n == 0) { |
| 45 | return starts; | ||
| 46 | } | ||
| 47 |
1/4✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
1 | std::vector<size_t> prefix(weights.size() + 1, 0); |
| 48 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
|
7 | for (size_t i = 0; i < weights.size(); ++i) { |
| 49 | 6 | prefix[i + 1] = prefix[i] + weights[i]; | |
| 50 | } | ||
| 51 | 1 | const size_t total = prefix.back(); | |
| 52 | int cur = 0; | ||
| 53 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int part = 1; part < parts; ++part) { |
| 54 | 1 | const size_t target = total * static_cast<size_t>(part) / static_cast<size_t>(parts); | |
| 55 |
3/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 1 times.
|
5 | while (cur < n && prefix[static_cast<size_t>(cur)] < target) { |
| 56 | 4 | ++cur; | |
| 57 | } | ||
| 58 | 1 | starts[static_cast<size_t>(part)] = cur; | |
| 59 | } | ||
| 60 | 1 | starts[static_cast<size_t>(parts)] = n; | |
| 61 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int part = 1; part <= parts; ++part) { |
| 62 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | starts[part] = std::max(starts[part], starts[part - 1]); |
| 63 | } | ||
| 64 | return starts; | ||
| 65 | } | ||
| 66 | |||
| 67 | 6 | inline void MatMultPhase1(size_t j, const CCS &a, const CCS &b, std::vector<size_t> &was, | |
| 68 | std::vector<size_t> &col_nnz) { | ||
| 69 | size_t count = 0; | ||
| 70 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
|
15 | for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) { |
| 71 | 9 | const size_t b_row = b.row[k]; | |
| 72 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
|
23 | for (size_t zc = a.col_ind[b_row]; zc < a.col_ind[b_row + 1]; ++zc) { |
| 73 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | const size_t a_row = a.row[zc]; |
| 74 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | if (was[a_row] != j) { |
| 75 | 12 | was[a_row] = j; | |
| 76 | 12 | ++count; | |
| 77 | } | ||
| 78 | } | ||
| 79 | } | ||
| 80 | 6 | col_nnz[j] = count; | |
| 81 | 6 | } | |
| 82 | |||
| 83 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | inline void MatMultPhase2(size_t j, const CCS &a, const CCS &b, CCS &c, size_t stamp, std::vector<size_t> &was, |
| 84 | std::vector<double> &accum, std::vector<size_t> &rows) { | ||
| 85 | rows.clear(); | ||
| 86 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
|
15 | for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) { |
| 87 | 9 | const double b_val = b.value[k]; | |
| 88 | 9 | const size_t b_row = b.row[k]; | |
| 89 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
|
23 | for (size_t zc = a.col_ind[b_row]; zc < a.col_ind[b_row + 1]; ++zc) { |
| 90 | 14 | const size_t a_row = a.row[zc]; | |
| 91 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | accum[a_row] += a.value[zc] * b_val; |
| 92 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | if (was[a_row] != stamp) { |
| 93 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 9 times.
|
12 | was[a_row] = stamp; |
| 94 | rows.push_back(a_row); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | } | ||
| 98 | std::ranges::sort(rows); | ||
| 99 | 6 | size_t write = c.col_ind[j]; | |
| 100 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
|
18 | for (const size_t i : rows) { |
| 101 | 12 | c.row[write] = i; | |
| 102 | 12 | c.value[write] = accum[i]; | |
| 103 | 12 | accum[i] = 0.0; | |
| 104 | 12 | ++write; | |
| 105 | } | ||
| 106 | 6 | } | |
| 107 | |||
| 108 | 2 | void BcastCCS(CCS &m, int root_rank = 0) { | |
| 109 | 2 | int world_rank = 0; | |
| 110 | 2 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 111 | |||
| 112 | 2 | MPI_Bcast(&m.m, 1, MPI_INT, root_rank, MPI_COMM_WORLD); | |
| 113 | 2 | MPI_Bcast(&m.n, 1, MPI_INT, root_rank, MPI_COMM_WORLD); | |
| 114 | |||
| 115 | 2 | int nz = 0; | |
| 116 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank == root_rank) { |
| 117 | 1 | nz = static_cast<int>(m.value.size()); | |
| 118 | } | ||
| 119 | 2 | MPI_Bcast(&nz, 1, MPI_INT, root_rank, MPI_COMM_WORLD); | |
| 120 | |||
| 121 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank != root_rank) { |
| 122 | 1 | m.col_ind.resize(static_cast<size_t>(m.m) + 1); | |
| 123 | 1 | m.row.resize(static_cast<size_t>(nz)); | |
| 124 | 1 | m.value.resize(static_cast<size_t>(nz)); | |
| 125 | } | ||
| 126 | |||
| 127 | 2 | MPI_Bcast(m.col_ind.data(), ToIntCount((static_cast<size_t>(m.m) + 1) * sizeof(size_t)), MPI_BYTE, root_rank, | |
| 128 | MPI_COMM_WORLD); | ||
| 129 | 2 | MPI_Bcast(m.row.data(), ToIntCount(static_cast<size_t>(nz) * sizeof(size_t)), MPI_BYTE, root_rank, MPI_COMM_WORLD); | |
| 130 | 2 | MPI_Bcast(m.value.data(), nz, MPI_DOUBLE, root_rank, MPI_COMM_WORLD); | |
| 131 | 2 | } | |
| 132 | |||
| 133 | 2 | void SendCCS(const CCS &m, int dest) { | |
| 134 | 2 | MPI_Send(&m.m, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 135 | 2 | MPI_Send(&m.n, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 136 | 2 | const int nz = static_cast<int>(m.value.size()); | |
| 137 | 2 | MPI_Send(&nz, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 138 | 2 | MPI_Send(m.col_ind.data(), ToIntCount(m.col_ind.size() * sizeof(size_t)), MPI_BYTE, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 139 | 2 | MPI_Send(m.row.data(), ToIntCount(m.row.size() * sizeof(size_t)), MPI_BYTE, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 140 | 2 | MPI_Send(m.value.data(), nz, MPI_DOUBLE, dest, kTagMatrix, MPI_COMM_WORLD); | |
| 141 | 2 | } | |
| 142 | |||
| 143 | 2 | void RecvCCS(CCS &m, int src) { | |
| 144 | MPI_Status st; | ||
| 145 | 2 | MPI_Recv(&m.m, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st); | |
| 146 | 2 | MPI_Recv(&m.n, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st); | |
| 147 | 2 | int nz = 0; | |
| 148 | 2 | MPI_Recv(&nz, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st); | |
| 149 | 2 | m.col_ind.resize(static_cast<size_t>(m.m) + 1); | |
| 150 | 2 | m.row.resize(static_cast<size_t>(nz)); | |
| 151 | 2 | m.value.resize(static_cast<size_t>(nz)); | |
| 152 | 2 | MPI_Recv(m.col_ind.data(), ToIntCount(m.col_ind.size() * sizeof(size_t)), MPI_BYTE, src, kTagMatrix, MPI_COMM_WORLD, | |
| 153 | &st); | ||
| 154 | 2 | MPI_Recv(m.row.data(), ToIntCount(m.row.size() * sizeof(size_t)), MPI_BYTE, src, kTagMatrix, MPI_COMM_WORLD, &st); | |
| 155 | 2 | MPI_Recv(m.value.data(), nz, MPI_DOUBLE, src, kTagMatrix, MPI_COMM_WORLD, &st); | |
| 156 | 2 | } | |
| 157 | |||
| 158 | 2 | void ScatterB(const CCS &b, CCS &b_local, const std::vector<int> &col_starts, int rank, int size) { | |
| 159 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (rank == 0) { |
| 160 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int proc = 0; proc < size; ++proc) { |
| 161 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | const int jstart = col_starts[static_cast<size_t>(proc)]; |
| 162 | 2 | const int jend = col_starts[static_cast<size_t>(proc) + 1]; | |
| 163 | 2 | const int local_cols = jend - jstart; | |
| 164 | |||
| 165 | 2 | CCS tmp; | |
| 166 | 2 | tmp.m = local_cols; | |
| 167 | 2 | tmp.n = b.n; | |
| 168 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | tmp.col_ind.resize(static_cast<size_t>(local_cols) + 1); |
| 169 | |||
| 170 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | const size_t nnz_start = b.col_ind[static_cast<size_t>(jstart)]; |
| 171 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | const size_t nnz_end = b.col_ind[static_cast<size_t>(jend)]; |
| 172 | |||
| 173 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | tmp.row.assign(b.row.begin() + static_cast<std::ptrdiff_t>(nnz_start), |
| 174 | b.row.begin() + static_cast<std::ptrdiff_t>(nnz_end)); | ||
| 175 | 2 | tmp.value.assign(b.value.begin() + static_cast<std::ptrdiff_t>(nnz_start), | |
| 176 | b.value.begin() + static_cast<std::ptrdiff_t>(nnz_end)); | ||
| 177 | |||
| 178 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | for (int j = 0; j <= local_cols; ++j) { |
| 179 | 8 | tmp.col_ind[static_cast<size_t>(j)] = | |
| 180 | 8 | b.col_ind[static_cast<size_t>(jstart) + static_cast<size_t>(j)] - nnz_start; | |
| 181 | } | ||
| 182 | |||
| 183 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (proc == 0) { |
| 184 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | b_local = tmp; |
| 185 | } else { | ||
| 186 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | SendCCS(tmp, proc); |
| 187 | } | ||
| 188 | 2 | } | |
| 189 | } else { | ||
| 190 | 1 | RecvCCS(b_local, 0); | |
| 191 | } | ||
| 192 | 2 | } | |
| 193 | |||
| 194 | 2 | void GatherC(CCS &c, const CCS &c_local, const std::vector<int> &col_starts, int rank, int size) { | |
| 195 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (rank != 0) { |
| 196 | 1 | SendCCS(c_local, 0); | |
| 197 | 1 | return; | |
| 198 | } | ||
| 199 | |||
| 200 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | std::vector<CCS> parts(static_cast<size_t>(size)); |
| 201 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | parts[0] = c_local; |
| 202 | |||
| 203 | size_t total_nnz = c_local.value.size(); | ||
| 204 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int proc = 1; proc < size; ++proc) { |
| 205 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | RecvCCS(parts[static_cast<size_t>(proc)], proc); |
| 206 | 1 | total_nnz += parts[static_cast<size_t>(proc)].value.size(); | |
| 207 | } | ||
| 208 | |||
| 209 | 1 | c.m = col_starts.back(); | |
| 210 | 1 | c.n = c_local.n; | |
| 211 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | c.col_ind.assign(static_cast<size_t>(c.m) + 1, 0); |
| 212 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | c.row.resize(total_nnz); |
| 213 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | c.value.resize(total_nnz); |
| 214 | |||
| 215 | size_t nnz_offset = 0; | ||
| 216 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int proc = 0; proc < size; ++proc) { |
| 217 | 2 | const CCS &src = parts[static_cast<size_t>(proc)]; | |
| 218 | 2 | const int start = col_starts[static_cast<size_t>(proc)]; | |
| 219 | 2 | const size_t cols = src.m; | |
| 220 | |||
| 221 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | for (size_t j = 0; j <= cols; ++j) { |
| 222 | 8 | c.col_ind[static_cast<size_t>(start) + j] = nnz_offset + src.col_ind[j]; | |
| 223 | } | ||
| 224 | |||
| 225 | std::ranges::copy(src.row, c.row.begin() + static_cast<std::ptrdiff_t>(nnz_offset)); | ||
| 226 | std::ranges::copy(src.value, c.value.begin() + static_cast<std::ptrdiff_t>(nnz_offset)); | ||
| 227 | 2 | nnz_offset += src.value.size(); | |
| 228 | } | ||
| 229 | |||
| 230 | 1 | c.col_ind[static_cast<size_t>(c.m)] = total_nnz; | |
| 231 | 1 | } | |
| 232 | |||
| 233 | } // namespace | ||
| 234 | |||
| 235 |
1/2✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
|
2 | KulikAMatMulDoubleCcsALL::KulikAMatMulDoubleCcsALL(const InType &in) { |
| 236 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 237 | |||
| 238 | 2 | int world_rank = 0; | |
| 239 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); |
| 240 | |||
| 241 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank == 0) { |
| 242 | GetInput() = in; | ||
| 243 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | GetOutput() = CCS(); |
| 244 | } else { | ||
| 245 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
2 | GetInput() = std::make_tuple(CCS(), CCS()); |
| 246 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | GetOutput() = CCS(); |
| 247 | } | ||
| 248 | 2 | } | |
| 249 | |||
| 250 | 2 | bool KulikAMatMulDoubleCcsALL::ValidationImpl() { | |
| 251 | 2 | int world_rank = 0; | |
| 252 | 2 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 253 | |||
| 254 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank == 0) { |
| 255 | const auto &a = std::get<0>(GetInput()); | ||
| 256 | const auto &b = std::get<1>(GetInput()); | ||
| 257 | 1 | return (a.m == b.n); | |
| 258 | } | ||
| 259 | |||
| 260 | return true; | ||
| 261 | } | ||
| 262 | |||
| 263 | 2 | bool KulikAMatMulDoubleCcsALL::PreProcessingImpl() { | |
| 264 | 2 | return true; | |
| 265 | } | ||
| 266 | |||
| 267 | 2 | bool KulikAMatMulDoubleCcsALL::RunImpl() { | |
| 268 | 2 | int world_rank = 0; | |
| 269 | 2 | int world_size = 0; | |
| 270 | 2 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 271 | 2 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 272 | |||
| 273 | auto &a = std::get<0>(GetInput()); | ||
| 274 | auto &b = std::get<1>(GetInput()); | ||
| 275 | OutType &c = GetOutput(); | ||
| 276 | |||
| 277 | 2 | std::vector<int> col_starts; | |
| 278 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank == 0) { |
| 279 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | std::vector<size_t> weights(static_cast<size_t>(b.m), 0); |
| 280 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
|
7 | for (size_t j = 0; j < static_cast<size_t>(b.m); ++j) { |
| 281 | 6 | weights[j] = EstimateColumnCost(a, b, j); | |
| 282 | } | ||
| 283 |
2/6✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
2 | col_starts = BuildBalancedStarts(weights, world_size); |
| 284 | } else { | ||
| 285 |
1/4✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
1 | col_starts.resize(static_cast<size_t>(world_size) + 1, 0); |
| 286 | } | ||
| 287 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Bcast(col_starts.data(), world_size + 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 288 | |||
| 289 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | BcastCCS(a); |
| 290 | |||
| 291 | 2 | CCS local_b; | |
| 292 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | ScatterB(b, local_b, col_starts, world_rank, world_size); |
| 293 | |||
| 294 | 2 | const auto local_cols = static_cast<size_t>(local_b.m); | |
| 295 | |||
| 296 | 2 | CCS local_c; | |
| 297 | 2 | local_c.m = local_b.m; | |
| 298 | 2 | local_c.n = a.n; | |
| 299 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | local_c.col_ind.assign(local_cols + 1, 0); |
| 300 | |||
| 301 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | std::vector<size_t> col_nnz(local_cols, 0); |
| 302 | |||
| 303 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | #pragma omp parallel num_threads(std::max(1, ppc::util::GetNumThreads())) default(none) \ |
| 304 | shared(a, local_b, col_nnz, local_cols) | ||
| 305 | { | ||
| 306 | std::vector<size_t> was(static_cast<size_t>(a.n), std::numeric_limits<size_t>::max()); | ||
| 307 | #pragma omp for schedule(static) | ||
| 308 | for (size_t j = 0; j < local_cols; ++j) { | ||
| 309 | MatMultPhase1(j, a, local_b, was, col_nnz); | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | size_t total_nz = 0; | ||
| 314 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (size_t j = 0; j < local_cols; ++j) { |
| 315 | 6 | local_c.col_ind[j] = total_nz; | |
| 316 | 6 | total_nz += col_nnz[j]; | |
| 317 | } | ||
| 318 | 2 | local_c.col_ind[local_cols] = total_nz; | |
| 319 | 2 | local_c.nz = total_nz; | |
| 320 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | local_c.value.resize(total_nz); |
| 321 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | local_c.row.resize(total_nz); |
| 322 | |||
| 323 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | #pragma omp parallel num_threads(std::max(1, ppc::util::GetNumThreads())) default(none) \ |
| 324 | shared(a, local_b, local_c, local_cols) | ||
| 325 | { | ||
| 326 | std::vector<size_t> was(static_cast<size_t>(a.n), std::numeric_limits<size_t>::max()); | ||
| 327 | std::vector<double> accum(static_cast<size_t>(a.n), 0.0); | ||
| 328 | std::vector<size_t> rows; | ||
| 329 | #pragma omp for schedule(static) | ||
| 330 | for (size_t j = 0; j < local_cols; ++j) { | ||
| 331 | MatMultPhase2(j, a, local_b, local_c, local_cols + j, was, accum, rows); | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 335 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | GatherC(c, local_c, col_starts, world_rank, world_size); |
| 336 | |||
| 337 | 2 | return true; | |
| 338 | 2 | } | |
| 339 | |||
| 340 | 2 | bool KulikAMatMulDoubleCcsALL::PostProcessingImpl() { | |
| 341 | 2 | int world_rank = 0; | |
| 342 | 2 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 343 | |||
| 344 | OutType &c = GetOutput(); | ||
| 345 | |||
| 346 | 2 | int m = 0; | |
| 347 | 2 | int n = 0; | |
| 348 | 2 | int nz = 0; | |
| 349 | |||
| 350 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank == 0) { |
| 351 | 1 | m = static_cast<int>(c.m); | |
| 352 | 1 | n = static_cast<int>(c.n); | |
| 353 | 1 | nz = static_cast<int>(c.value.size()); | |
| 354 | } | ||
| 355 | |||
| 356 | 2 | MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 357 | 2 | MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 358 | 2 | MPI_Bcast(&nz, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 359 | |||
| 360 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (world_rank != 0) { |
| 361 | 1 | c.m = m; | |
| 362 | 1 | c.n = n; | |
| 363 | 1 | c.col_ind.resize(static_cast<size_t>(m) + 1); | |
| 364 | 1 | c.row.resize(static_cast<size_t>(nz)); | |
| 365 | 1 | c.value.resize(static_cast<size_t>(nz)); | |
| 366 | } | ||
| 367 | |||
| 368 | 2 | MPI_Bcast(c.col_ind.data(), ToIntCount((static_cast<size_t>(m) + 1) * sizeof(size_t)), MPI_BYTE, 0, MPI_COMM_WORLD); | |
| 369 | 2 | MPI_Bcast(c.row.data(), ToIntCount(static_cast<size_t>(nz) * sizeof(size_t)), MPI_BYTE, 0, MPI_COMM_WORLD); | |
| 370 | 2 | MPI_Bcast(c.value.data(), nz, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 371 | |||
| 372 | 2 | return true; | |
| 373 | } | ||
| 374 | |||
| 375 | } // namespace kulik_a_mat_mul_double_ccs | ||
| 376 |