| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "terekhov_d_gauss_vert/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "terekhov_d_gauss_vert/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace terekhov_d_gauss_vert { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | inline int Mirror(int coord, int max_val) { | ||
| 18 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 1574 times.
|
1632 | if (coord < 0) { |
| 19 | 58 | return -coord - 1; | |
| 20 | } | ||
| 21 |
4/4✓ Branch 0 taken 64 times.
✓ Branch 1 taken 1504 times.
✓ Branch 2 taken 58 times.
✓ Branch 3 taken 1516 times.
|
3142 | if (coord >= max_val) { |
| 22 | 122 | return (2 * max_val) - coord - 1; | |
| 23 | } | ||
| 24 | return coord; | ||
| 25 | } | ||
| 26 | |||
| 27 | 6 | inline void FillLocalPadded(std::vector<int> &local_padded, const InType &in, int padded_width, int width, int height, | |
| 28 | int start_row, int local_padded_height) { | ||
| 29 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 6 times.
|
70 | for (int row = 0; row < local_padded_height; ++row) { |
| 30 | 64 | const int global_row = start_row + row - 1; | |
| 31 |
2/2✓ Branch 0 taken 1632 times.
✓ Branch 1 taken 64 times.
|
1696 | for (int col = 0; col < padded_width; ++col) { |
| 32 |
2/2✓ Branch 0 taken 1568 times.
✓ Branch 1 taken 64 times.
|
1632 | const int src_x = Mirror(col - 1, width); |
| 33 | const int src_y = Mirror(global_row, height); | ||
| 34 | 1632 | const size_t src_idx = (static_cast<size_t>(src_y) * static_cast<size_t>(width)) + static_cast<size_t>(src_x); | |
| 35 | 1632 | const size_t padded_idx = | |
| 36 | 1632 | (static_cast<size_t>(row) * static_cast<size_t>(padded_width)) + static_cast<size_t>(col); | |
| 37 | 1632 | local_padded[padded_idx] = in.data[src_idx]; | |
| 38 | } | ||
| 39 | } | ||
| 40 | 6 | } | |
| 41 | |||
| 42 | inline void ProcessLocalRows(std::vector<int> &local_result, const std::vector<int> &local_padded, int padded_width, | ||
| 43 | int width, int local_height) { | ||
| 44 | 6 | #pragma omp parallel for schedule(static) default(none) \ | |
| 45 | shared(local_result, local_padded, padded_width, width, local_height) shared(kGaussKernel) | ||
| 46 | for (int row = 0; row < local_height; ++row) { | ||
| 47 | for (int col = 0; col < width; ++col) { | ||
| 48 | const size_t idx = (static_cast<size_t>(row) * static_cast<size_t>(width)) + static_cast<size_t>(col); | ||
| 49 | float sum = 0.0F; | ||
| 50 | |||
| 51 | for (int ky = -1; ky <= 1; ++ky) { | ||
| 52 | for (int kx = -1; kx <= 1; ++kx) { | ||
| 53 | const int px = col + kx + 1; | ||
| 54 | const int py = row + ky + 1; | ||
| 55 | const int kernel_idx = ((ky + 1) * 3) + (kx + 1); | ||
| 56 | const size_t padded_idx = | ||
| 57 | (static_cast<size_t>(py) * static_cast<size_t>(padded_width)) + static_cast<size_t>(px); | ||
| 58 | sum += static_cast<float>(local_padded[padded_idx]) * kGaussKernel[static_cast<size_t>(kernel_idx)]; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | local_result[idx] = static_cast<int>(std::lround(sum)); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | 6 | inline void GatherResults(std::vector<int> &output_data, const std::vector<int> &local_result, int size) { | |
| 68 | 6 | std::vector<int> recv_counts(static_cast<size_t>(size)); | |
| 69 |
2/6✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
6 | std::vector<int> displs(static_cast<size_t>(size)); |
| 70 | |||
| 71 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | int local_count = static_cast<int>(local_result.size()); |
| 72 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgather(&local_count, 1, MPI_INT, recv_counts.data(), 1, MPI_INT, MPI_COMM_WORLD); |
| 73 | |||
| 74 | 6 | displs[0] = 0; | |
| 75 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | for (int idx = 1; idx < size; ++idx) { |
| 76 | 6 | displs[idx] = displs[idx - 1] + recv_counts[idx - 1]; | |
| 77 | } | ||
| 78 | |||
| 79 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgatherv(local_result.data(), local_count, MPI_INT, output_data.data(), recv_counts.data(), displs.data(), |
| 80 | MPI_INT, MPI_COMM_WORLD); | ||
| 81 | 6 | } | |
| 82 | |||
| 83 | 6 | inline OutType SolveALL(const InType &in) { | |
| 84 | 6 | int rank = 0; | |
| 85 | 6 | int size = 1; | |
| 86 | 6 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 87 | 6 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 88 | |||
| 89 | 6 | const int width = in.width; | |
| 90 | 6 | const int height = in.height; | |
| 91 | |||
| 92 | 6 | OutType output; | |
| 93 | 6 | output.width = width; | |
| 94 | 6 | output.height = height; | |
| 95 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | output.data.resize(static_cast<size_t>(width) * static_cast<size_t>(height)); |
| 96 | |||
| 97 | 6 | int rows_per_proc = height / size; | |
| 98 | 6 | int remainder = height % size; | |
| 99 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | int start_row = (rank * rows_per_proc) + std::min(rank, remainder); |
| 100 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | int end_row = start_row + rows_per_proc + ((rank < remainder) ? 1 : 0); |
| 101 | |||
| 102 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (start_row >= end_row) { |
| 103 | return output; | ||
| 104 | } | ||
| 105 | |||
| 106 | 6 | const int padded_width = width + 2; | |
| 107 | 6 | const int local_height = end_row - start_row; | |
| 108 | 6 | const int local_padded_height = local_height + 2; | |
| 109 | |||
| 110 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> local_padded(static_cast<size_t>(padded_width) * static_cast<size_t>(local_padded_height), 0); |
| 111 | 6 | FillLocalPadded(local_padded, in, padded_width, width, height, start_row, local_padded_height); | |
| 112 | |||
| 113 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> local_result(static_cast<size_t>(local_height) * static_cast<size_t>(width), 0); |
| 114 | ProcessLocalRows(local_result, local_padded, padded_width, width, local_height); | ||
| 115 | |||
| 116 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | GatherResults(output.data, local_result, size); |
| 117 | |||
| 118 | return output; | ||
| 119 | } | ||
| 120 | |||
| 121 | } // namespace | ||
| 122 | |||
| 123 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | TerekhovDGaussVertALL::TerekhovDGaussVertALL(const InType &in) { |
| 124 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 125 | GetInput() = in; | ||
| 126 | 6 | local_out_.width = 0; | |
| 127 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | local_out_.height = 0; |
| 128 | local_out_.data.clear(); | ||
| 129 | 6 | } | |
| 130 | |||
| 131 | 12 | bool TerekhovDGaussVertALL::ValidationImpl() { | |
| 132 | const auto &in = GetInput(); | ||
| 133 |
2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | if ((in.width <= 0) || (in.height <= 0)) { |
| 134 | return false; | ||
| 135 | } | ||
| 136 | 12 | const std::size_t need = static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height); | |
| 137 | 12 | return in.data.size() == need; | |
| 138 | } | ||
| 139 | |||
| 140 | 6 | bool TerekhovDGaussVertALL::PreProcessingImpl() { | |
| 141 | 6 | local_out_.width = 0; | |
| 142 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | local_out_.height = 0; |
| 143 | local_out_.data.clear(); | ||
| 144 | 6 | return true; | |
| 145 | } | ||
| 146 | |||
| 147 | 6 | bool TerekhovDGaussVertALL::RunImpl() { | |
| 148 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | if (!ValidationImpl()) { |
| 149 | return false; | ||
| 150 | } | ||
| 151 | 6 | local_out_ = SolveALL(GetInput()); | |
| 152 | 6 | return true; | |
| 153 | } | ||
| 154 | |||
| 155 | 6 | bool TerekhovDGaussVertALL::PostProcessingImpl() { | |
| 156 | GetOutput() = local_out_; | ||
| 157 | const auto &out = GetOutput(); | ||
| 158 | 6 | return out.data.size() == (static_cast<size_t>(out.width) * static_cast<size_t>(out.height)); | |
| 159 | } | ||
| 160 | |||
| 161 | } // namespace terekhov_d_gauss_vert | ||
| 162 |