| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "makoveeva_matmul_double/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "makoveeva_matmul_double/all/include/common.hpp" | ||
| 11 | |||
| 12 | namespace makoveeva_matmul_double_all { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | void ParallelMultiplyImpl(size_t n, const std::vector<double> &a, const std::vector<double> &b, | ||
| 16 | std::vector<double> &c) { | ||
| 17 | 12 | #pragma omp parallel for default(none) shared(n, a, b, c) collapse(2) | |
| 18 | for (size_t i = 0; i < n; ++i) { | ||
| 19 | for (size_t j = 0; j < n; ++j) { | ||
| 20 | double sum = 0.0; | ||
| 21 | for (size_t k = 0; k < n; ++k) { | ||
| 22 | sum += a[(i * n) + k] * b[(k * n) + j]; | ||
| 23 | } | ||
| 24 | c[(i * n) + j] = sum; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | 12 | } | |
| 28 | |||
| 29 | void SplitIntoBlocksImpl(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int grid_size) { | ||
| 30 | ✗ | #pragma omp parallel for default(none) shared(src, dst, n, bs, grid_size) collapse(2) | |
| 31 | for (int bi = 0; bi < grid_size; ++bi) { | ||
| 32 | for (int bj = 0; bj < grid_size; ++bj) { | ||
| 33 | const size_t block_start = static_cast<size_t>((bi * grid_size) + bj) * (bs * bs); | ||
| 34 | |||
| 35 | for (size_t i = 0; i < bs; ++i) { | ||
| 36 | for (size_t j = 0; j < bs; ++j) { | ||
| 37 | const size_t src_pos = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j); | ||
| 38 | const size_t dst_pos = block_start + (i * bs) + j; | ||
| 39 | dst[dst_pos] = src[src_pos]; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | } | ||
| 43 | } | ||
| 44 | ✗ | } | |
| 45 | |||
| 46 | void MergeFromBlocksImpl(const std::vector<double> &src, std::vector<double> &dst, size_t n, size_t bs, int grid_size) { | ||
| 47 | ✗ | #pragma omp parallel for default(none) shared(src, dst, n, bs, grid_size) collapse(2) | |
| 48 | for (int bi = 0; bi < grid_size; ++bi) { | ||
| 49 | for (int bj = 0; bj < grid_size; ++bj) { | ||
| 50 | const size_t block_start = static_cast<size_t>((bi * grid_size) + bj) * (bs * bs); | ||
| 51 | |||
| 52 | for (size_t i = 0; i < bs; ++i) { | ||
| 53 | for (size_t j = 0; j < bs; ++j) { | ||
| 54 | const size_t src_pos = block_start + (i * bs) + j; | ||
| 55 | const size_t dst_pos = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j); | ||
| 56 | dst[dst_pos] = src[src_pos]; | ||
| 57 | } | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } | ||
| 61 | ✗ | } | |
| 62 | |||
| 63 | void MultiplyBlockPairImpl(const std::vector<double> &block_a, const std::vector<double> &block_b, | ||
| 64 | std::vector<double> &block_c, size_t bs) { | ||
| 65 | ✗ | #pragma omp parallel for default(none) shared(block_a, block_b, block_c, bs) collapse(2) | |
| 66 | for (size_t i = 0; i < bs; ++i) { | ||
| 67 | for (size_t j = 0; j < bs; ++j) { | ||
| 68 | double sum = 0.0; | ||
| 69 | for (size_t k = 0; k < bs; ++k) { | ||
| 70 | sum += block_a[(i * bs) + k] * block_b[(k * bs) + j]; | ||
| 71 | } | ||
| 72 | block_c[(i * bs) + j] += sum; | ||
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | bool IsValidConfigurationImpl(size_t n, int grid_size, int num_procs) { | ||
| 78 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
24 | return ((grid_size * grid_size) == num_procs) && ((n % static_cast<size_t>(grid_size)) == 0); |
| 79 | } | ||
| 80 | |||
| 81 | 24 | void HandleFallbackImpl(int my_rank, size_t n, const std::vector<double> &a, const std::vector<double> &b, | |
| 82 | std::vector<double> &c) { | ||
| 83 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (my_rank == 0) { |
| 84 | ParallelMultiplyImpl(n, a, b, c); | ||
| 85 | } | ||
| 86 | 24 | MPI_Bcast(c.data(), static_cast<int>(n * n), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 87 | 24 | } | |
| 88 | |||
| 89 | ✗ | void DistributeBlocksImpl(int my_rank, const std::vector<double> &blocks_a, const std::vector<double> &blocks_b, | |
| 90 | std::vector<double> &local_a, std::vector<double> &local_b, size_t block_sz) { | ||
| 91 | ✗ | const double *send_a = (my_rank == 0) ? blocks_a.data() : nullptr; | |
| 92 | const double *send_b = (my_rank == 0) ? blocks_b.data() : nullptr; | ||
| 93 | |||
| 94 | ✗ | MPI_Scatter(send_a, static_cast<int>(block_sz), MPI_DOUBLE, local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, | |
| 95 | MPI_COMM_WORLD); | ||
| 96 | |||
| 97 | ✗ | MPI_Scatter(send_b, static_cast<int>(block_sz), MPI_DOUBLE, local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, | |
| 98 | MPI_COMM_WORLD); | ||
| 99 | ✗ | } | |
| 100 | |||
| 101 | ✗ | void ExecuteFoxIterationsImpl(int grid_dim, int row_id, int col_id, size_t bs, size_t block_sz, MPI_Comm row_comm, | |
| 102 | std::vector<double> &local_a, std::vector<double> &local_b, | ||
| 103 | std::vector<double> &local_c) { | ||
| 104 | ✗ | std::vector<double> broadcast_buffer(block_sz); | |
| 105 | |||
| 106 | ✗ | for (int stage = 0; stage < grid_dim; ++stage) { | |
| 107 | ✗ | const int source = (row_id + stage) % grid_dim; | |
| 108 | |||
| 109 | ✗ | if (col_id == source) { | |
| 110 | ✗ | broadcast_buffer = local_a; | |
| 111 | } | ||
| 112 | |||
| 113 | ✗ | MPI_Bcast(broadcast_buffer.data(), static_cast<int>(block_sz), MPI_DOUBLE, source, row_comm); | |
| 114 | |||
| 115 | MultiplyBlockPairImpl(broadcast_buffer, local_b, local_c, bs); | ||
| 116 | |||
| 117 | ✗ | const int target = (((row_id - 1 + grid_dim) % grid_dim) * grid_dim) + col_id; | |
| 118 | // Исправлено: добавлены скобки для порядка операций | ||
| 119 | ✗ | const int origin = ((((row_id + 1) % grid_dim) % grid_dim) * grid_dim) + col_id; | |
| 120 | |||
| 121 | ✗ | MPI_Sendrecv_replace(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, target, 0, origin, 0, MPI_COMM_WORLD, | |
| 122 | MPI_STATUS_IGNORE); | ||
| 123 | } | ||
| 124 | ✗ | } | |
| 125 | |||
| 126 | ✗ | void CollectResultsImpl(int my_rank, int num_procs, size_t n, size_t bs, size_t block_sz, int grid_dim, | |
| 127 | const std::vector<double> &local_c, std::vector<double> &c) { | ||
| 128 | ✗ | std::vector<double> all_blocks; | |
| 129 | |||
| 130 | ✗ | if (my_rank == 0) { | |
| 131 | ✗ | all_blocks.resize(static_cast<size_t>(num_procs) * block_sz); | |
| 132 | } | ||
| 133 | |||
| 134 | double *recv_buf = (my_rank == 0) ? all_blocks.data() : nullptr; | ||
| 135 | |||
| 136 | ✗ | MPI_Gather(local_c.data(), static_cast<int>(block_sz), MPI_DOUBLE, recv_buf, static_cast<int>(block_sz), MPI_DOUBLE, | |
| 137 | 0, MPI_COMM_WORLD); | ||
| 138 | |||
| 139 | ✗ | if (my_rank == 0) { | |
| 140 | MergeFromBlocksImpl(all_blocks, c, n, bs, grid_dim); | ||
| 141 | } | ||
| 142 | |||
| 143 | ✗ | MPI_Bcast(c.data(), static_cast<int>(n * n), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 144 | ✗ | } | |
| 145 | |||
| 146 | } // namespace | ||
| 147 | |||
| 148 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MatmulDoubleAllTask::MatmulDoubleAllTask(const InType &in) { |
| 149 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 150 | GetInput() = in; | ||
| 151 | 24 | GetOutput() = std::vector<double>(); | |
| 152 | 24 | } | |
| 153 | |||
| 154 | 24 | bool MatmulDoubleAllTask::ValidationImpl() { | |
| 155 | const auto &input = GetInput(); | ||
| 156 | 24 | const size_t n = std::get<0>(input); | |
| 157 | const auto &a = std::get<1>(input); | ||
| 158 | const auto &b = std::get<2>(input); | ||
| 159 | |||
| 160 |
3/6✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 24 times.
|
24 | return n > 0 && a.size() == n * n && b.size() == n * n; |
| 161 | } | ||
| 162 | |||
| 163 | 24 | bool MatmulDoubleAllTask::PreProcessingImpl() { | |
| 164 | const auto &input = GetInput(); | ||
| 165 | 24 | matrix_size_ = std::get<0>(input); | |
| 166 | 24 | matrix_a_ = std::get<1>(input); | |
| 167 | 24 | matrix_b_ = std::get<2>(input); | |
| 168 | 24 | result_matrix_.assign(matrix_size_ * matrix_size_, 0.0); | |
| 169 | |||
| 170 | 24 | return true; | |
| 171 | } | ||
| 172 | |||
| 173 | 24 | bool MatmulDoubleAllTask::RunImpl() { | |
| 174 | 24 | int my_rank = 0; | |
| 175 | 24 | int num_procs = 1; | |
| 176 | |||
| 177 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); | |
| 178 | 24 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); | |
| 179 | |||
| 180 | 24 | const size_t n = matrix_size_; | |
| 181 | 24 | const auto &a = matrix_a_; | |
| 182 | 24 | const auto &b = matrix_b_; | |
| 183 | 24 | auto &c = result_matrix_; | |
| 184 | |||
| 185 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | const int grid_dim = static_cast<int>(std::sqrt(num_procs)); |
| 186 | |||
| 187 | if (!IsValidConfigurationImpl(n, grid_dim, num_procs)) { | ||
| 188 | 24 | HandleFallbackImpl(my_rank, n, a, b, c); | |
| 189 | 24 | GetOutput() = c; | |
| 190 | 24 | return true; | |
| 191 | } | ||
| 192 | |||
| 193 | ✗ | const size_t bs = n / static_cast<size_t>(grid_dim); | |
| 194 | ✗ | const size_t block_sz = bs * bs; | |
| 195 | |||
| 196 | ✗ | const int row_idx = my_rank / grid_dim; | |
| 197 | ✗ | const int col_idx = my_rank % grid_dim; | |
| 198 | |||
| 199 | ✗ | std::vector<double> local_a_block(block_sz); | |
| 200 | ✗ | std::vector<double> local_b_block(block_sz); | |
| 201 | ✗ | std::vector<double> local_c_block(block_sz, 0.0); | |
| 202 | |||
| 203 | ✗ | std::vector<double> all_blocks_a; | |
| 204 | ✗ | std::vector<double> all_blocks_b; | |
| 205 | |||
| 206 | ✗ | if (my_rank == 0) { | |
| 207 | ✗ | all_blocks_a.resize(static_cast<size_t>(num_procs) * block_sz); | |
| 208 | ✗ | all_blocks_b.resize(static_cast<size_t>(num_procs) * block_sz); | |
| 209 | |||
| 210 | SplitIntoBlocksImpl(a, all_blocks_a, n, bs, grid_dim); | ||
| 211 | SplitIntoBlocksImpl(b, all_blocks_b, n, bs, grid_dim); | ||
| 212 | } | ||
| 213 | |||
| 214 | ✗ | DistributeBlocksImpl(my_rank, all_blocks_a, all_blocks_b, local_a_block, local_b_block, block_sz); | |
| 215 | |||
| 216 | ✗ | MPI_Comm row_comm = MPI_COMM_NULL; | |
| 217 | ✗ | MPI_Comm_split(MPI_COMM_WORLD, row_idx, col_idx, &row_comm); | |
| 218 | |||
| 219 | ✗ | ExecuteFoxIterationsImpl(grid_dim, row_idx, col_idx, bs, block_sz, row_comm, local_a_block, local_b_block, | |
| 220 | local_c_block); | ||
| 221 | |||
| 222 | ✗ | CollectResultsImpl(my_rank, num_procs, n, bs, block_sz, grid_dim, local_c_block, c); | |
| 223 | |||
| 224 | ✗ | if (row_comm != MPI_COMM_NULL) { | |
| 225 | ✗ | MPI_Comm_free(&row_comm); | |
| 226 | } | ||
| 227 | |||
| 228 | ✗ | GetOutput() = c; | |
| 229 | return true; | ||
| 230 | } | ||
| 231 | |||
| 232 | 24 | bool MatmulDoubleAllTask::PostProcessingImpl() { | |
| 233 | 24 | return true; | |
| 234 | } | ||
| 235 | |||
| 236 | } // namespace makoveeva_matmul_double_all | ||
| 237 |