| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "samoylenko_i_conj_grad_method/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "samoylenko_i_conj_grad_method/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace samoylenko_i_conj_grad_method { | ||
| 12 | |||
| 13 | 24 | SamoylenkoIConjGradMethodMPI::SamoylenkoIConjGradMethodMPI(const InType &in) { | |
| 14 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 15 | GetInput() = in; | ||
| 16 | GetOutput().clear(); | ||
| 17 | 24 | } | |
| 18 | |||
| 19 | 24 | bool SamoylenkoIConjGradMethodMPI::ValidationImpl() { | |
| 20 | 24 | int world_rank = 0; | |
| 21 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 22 | |||
| 23 | 24 | int valid = 0; | |
| 24 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 25 |
4/8✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 12 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 12 times.
|
12 | valid = (GetInput().first > 0 && (GetInput().second >= 0 && GetInput().second <= 2) && GetOutput().empty()) ? 1 : 0; |
| 26 | } | ||
| 27 | |||
| 28 | 24 | MPI_Bcast(&valid, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 29 | 24 | return valid == 1; | |
| 30 | } | ||
| 31 | |||
| 32 | 24 | bool SamoylenkoIConjGradMethodMPI::PreProcessingImpl() { | |
| 33 | 24 | int world_rank = 0; | |
| 34 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 37 | GetOutput().clear(); | ||
| 38 | } | ||
| 39 | |||
| 40 | 24 | return true; | |
| 41 | } | ||
| 42 | |||
| 43 | namespace { | ||
| 44 | |||
| 45 | void CalculateDistribution(size_t size, int world_size, std::vector<int> &row_counts, std::vector<int> &row_displs) { | ||
| 46 | 24 | int proc_rows = static_cast<int>(size) / world_size; | |
| 47 | 24 | int extra_rows = static_cast<int>(size) % world_size; | |
| 48 | int disp = 0; | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 24 times.
|
72 | for (int proc = 0; proc < world_size; ++proc) { |
| 51 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 12 times.
|
84 | row_counts[proc] = proc_rows + (proc < extra_rows ? 1 : 0); |
| 52 | 48 | row_displs[proc] = disp; | |
| 53 | 48 | disp += row_counts[proc]; | |
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | 24 | std::vector<double> BuildLocalMatrix(size_t size, int local_rows, int local_start, int variant) { | |
| 58 | 24 | std::vector<double> local_matrix(local_rows * size, 0.0); | |
| 59 | |||
| 60 |
2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 24 times.
|
216 | for (int i = 0; i < local_rows; ++i) { |
| 61 | 192 | size_t global_row = local_start + i; | |
| 62 |
3/4✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
|
192 | switch (variant) { |
| 63 | 64 | case 0: { | |
| 64 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 4 times.
|
64 | local_matrix[(static_cast<size_t>(i) * size) + global_row] = 4.0; |
| 65 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 4 times.
|
64 | if (global_row > 0) { |
| 66 | 60 | local_matrix[(static_cast<size_t>(i) * size) + (global_row - 1)] = 1.0; | |
| 67 | } | ||
| 68 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 4 times.
|
64 | if (global_row + 1 < size) { |
| 69 | 60 | local_matrix[(static_cast<size_t>(i) * size) + (global_row + 1)] = 1.0; | |
| 70 | } | ||
| 71 | break; | ||
| 72 | } | ||
| 73 | |||
| 74 | 64 | case 1: { | |
| 75 | 64 | local_matrix[(static_cast<size_t>(i) * size) + global_row] = 5.0; | |
| 76 | 64 | break; | |
| 77 | } | ||
| 78 | |||
| 79 | 64 | case 2: { | |
| 80 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
|
64 | local_matrix[(static_cast<size_t>(i) * size) + global_row] = 3.0; |
| 81 | 64 | size_t anti_col = size - 1 - global_row; | |
| 82 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
|
64 | if (anti_col != global_row) { |
| 83 | 62 | local_matrix[(static_cast<size_t>(i) * size) + anti_col] = -1.0; | |
| 84 | } | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | |||
| 88 | default: | ||
| 89 | break; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | 24 | return local_matrix; | |
| 93 | } | ||
| 94 | |||
| 95 | 84 | void LocalMatrixVectorMult(size_t size, int local_rows, const std::vector<double> &local_matrix, | |
| 96 | const std::vector<double> &vector, std::vector<double> &local_result) { | ||
| 97 |
2/2✓ Branch 0 taken 1030 times.
✓ Branch 1 taken 84 times.
|
1114 | for (int i = 0; i < local_rows; ++i) { |
| 98 | double sum = 0.0; | ||
| 99 |
2/2✓ Branch 0 taken 46078 times.
✓ Branch 1 taken 1030 times.
|
47108 | for (size_t j = 0; j < size; ++j) { |
| 100 | 46078 | sum += local_matrix[(static_cast<size_t>(i) * size) + j] * vector[j]; | |
| 101 | } | ||
| 102 | 1030 | local_result[i] = sum; | |
| 103 | } | ||
| 104 | 84 | } | |
| 105 | |||
| 106 | double LocalDotProduct(int local_rows, const std::vector<double> &local_first, | ||
| 107 | const std::vector<double> &local_second) { | ||
| 108 | double sum = 0.0; | ||
| 109 |
6/6✓ Branch 0 taken 192 times.
✓ Branch 1 taken 24 times.
✓ Branch 2 taken 838 times.
✓ Branch 3 taken 60 times.
✓ Branch 4 taken 838 times.
✓ Branch 5 taken 60 times.
|
2012 | for (int i = 0; i < local_rows; ++i) { |
| 110 | 1868 | sum += local_first[i] * local_second[i]; | |
| 111 | } | ||
| 112 | return sum; | ||
| 113 | } | ||
| 114 | |||
| 115 | 24 | void ConjugateGradient(size_t size, int local_rows, const std::vector<double> &local_matrix, | |
| 116 | const std::vector<double> &local_vector, const std::vector<int> &row_counts, | ||
| 117 | const std::vector<int> &row_displs, std::vector<double> &local_x) { | ||
| 118 | const double eps = 1e-7; | ||
| 119 | const int iters = 2000; | ||
| 120 | |||
| 121 | 24 | std::vector<double> local_res(local_rows); | |
| 122 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> local_dir(local_rows); |
| 123 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> local_matdir(local_rows); |
| 124 | |||
| 125 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> x(size); |
| 126 |
2/6✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
24 | std::vector<double> dir(size); |
| 127 | |||
| 128 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Allgatherv(local_x.data(), local_rows, MPI_DOUBLE, x.data(), row_counts.data(), row_displs.data(), MPI_DOUBLE, |
| 129 | MPI_COMM_WORLD); | ||
| 130 | 24 | LocalMatrixVectorMult(size, local_rows, local_matrix, x, local_matdir); | |
| 131 | |||
| 132 |
2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 24 times.
|
216 | for (int i = 0; i < local_rows; ++i) { |
| 133 | 192 | local_res[i] = local_vector[i] - local_matdir[i]; | |
| 134 | 192 | local_dir[i] = local_res[i]; | |
| 135 | } | ||
| 136 | |||
| 137 | 24 | double local_res_dot = LocalDotProduct(local_rows, local_res, local_res); | |
| 138 | 24 | double res_dot = 0.0; | |
| 139 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Allreduce(&local_res_dot, &res_dot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); |
| 140 | |||
| 141 |
1/2✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.
|
84 | for (int it = 0; it < iters; ++it) { |
| 142 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 24 times.
|
84 | if (std::sqrt(res_dot) < eps) { |
| 143 | break; | ||
| 144 | } | ||
| 145 | |||
| 146 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | MPI_Allgatherv(local_dir.data(), local_rows, MPI_DOUBLE, dir.data(), row_counts.data(), row_displs.data(), |
| 147 | MPI_DOUBLE, MPI_COMM_WORLD); | ||
| 148 | 60 | LocalMatrixVectorMult(size, local_rows, local_matrix, dir, local_matdir); | |
| 149 | |||
| 150 | 60 | double local_dir_dot = LocalDotProduct(local_rows, local_dir, local_matdir); | |
| 151 | 60 | double dir_dot = 0.0; | |
| 152 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | MPI_Allreduce(&local_dir_dot, &dir_dot, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); |
| 153 | |||
| 154 |
1/2✓ Branch 0 taken 60 times.
✗ Branch 1 not taken.
|
60 | if (std::fabs(dir_dot) < 1e-15) { |
| 155 | break; // so we dont divide by 0 | ||
| 156 | } | ||
| 157 | |||
| 158 | 60 | double step = res_dot / dir_dot; | |
| 159 | |||
| 160 |
2/2✓ Branch 0 taken 838 times.
✓ Branch 1 taken 60 times.
|
898 | for (int i = 0; i < local_rows; ++i) { |
| 161 | 838 | local_x[i] += step * local_dir[i]; | |
| 162 | 838 | local_res[i] -= step * local_matdir[i]; | |
| 163 | } | ||
| 164 | |||
| 165 | 60 | double local_res_dot_new = LocalDotProduct(local_rows, local_res, local_res); | |
| 166 | 60 | double res_dot_new = 0.0; | |
| 167 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | MPI_Allreduce(&local_res_dot_new, &res_dot_new, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); |
| 168 | |||
| 169 | 60 | double conj_coef = res_dot_new / res_dot; | |
| 170 | |||
| 171 |
2/2✓ Branch 0 taken 838 times.
✓ Branch 1 taken 60 times.
|
898 | for (int i = 0; i < local_rows; ++i) { |
| 172 | 838 | local_dir[i] = local_res[i] + (conj_coef * local_dir[i]); | |
| 173 | } | ||
| 174 | |||
| 175 | 60 | res_dot = res_dot_new; | |
| 176 | } | ||
| 177 | 24 | } | |
| 178 | |||
| 179 | } // namespace | ||
| 180 | |||
| 181 | 24 | bool SamoylenkoIConjGradMethodMPI::RunImpl() { | |
| 182 | 24 | int world_rank = 0; | |
| 183 | 24 | int world_size = 0; | |
| 184 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 185 | 24 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 186 | |||
| 187 | 24 | int n = 0; | |
| 188 | 24 | int variant = 0; | |
| 189 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 190 | 12 | n = GetInput().first; | |
| 191 | 12 | variant = GetInput().second; | |
| 192 | } | ||
| 193 | |||
| 194 | 24 | MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 195 | 24 | MPI_Bcast(&variant, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 196 | |||
| 197 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (n <= 0) { |
| 198 | return false; | ||
| 199 | } | ||
| 200 | |||
| 201 | 24 | auto size = static_cast<size_t>(n); | |
| 202 | |||
| 203 | 24 | std::vector<int> row_counts(world_size); | |
| 204 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<int> row_displs(world_size); |
| 205 | 24 | CalculateDistribution(size, world_size, row_counts, row_displs); | |
| 206 | |||
| 207 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | int local_rows = row_counts[world_rank]; |
| 208 | 24 | int local_start = row_displs[world_rank]; | |
| 209 | |||
| 210 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<double> local_matrix = BuildLocalMatrix(size, local_rows, local_start, variant); |
| 211 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> vector(size); |
| 212 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 213 |
2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 12 times.
|
204 | for (size_t i = 0; i < size; ++i) { |
| 214 | 192 | vector[i] = 1.0; | |
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 |
2/6✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
24 | std::vector<double> local_vector(local_rows); |
| 219 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Scatterv(vector.data(), row_counts.data(), row_displs.data(), MPI_DOUBLE, local_vector.data(), local_rows, |
| 220 | MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 221 | |||
| 222 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> local_x(local_rows, 0.0); |
| 223 | |||
| 224 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | ConjugateGradient(size, local_rows, local_matrix, local_vector, row_counts, row_displs, local_x); |
| 225 | |||
| 226 |
2/6✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
24 | std::vector<double> x(size); |
| 227 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Gatherv(local_x.data(), local_rows, MPI_DOUBLE, x.data(), row_counts.data(), row_displs.data(), MPI_DOUBLE, 0, |
| 228 | MPI_COMM_WORLD); | ||
| 229 | |||
| 230 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 231 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | GetOutput() = x; |
| 232 | } | ||
| 233 | |||
| 234 | return true; | ||
| 235 | } | ||
| 236 | |||
| 237 | 24 | bool SamoylenkoIConjGradMethodMPI::PostProcessingImpl() { | |
| 238 | 24 | int world_rank = 0; | |
| 239 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); | |
| 240 | |||
| 241 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (world_rank == 0) { |
| 242 | 12 | return !GetOutput().empty(); | |
| 243 | } | ||
| 244 | |||
| 245 | return true; | ||
| 246 | } | ||
| 247 | |||
| 248 | } // namespace samoylenko_i_conj_grad_method | ||
| 249 |