| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "iskhakov_d_vertical_gauss_filter/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "iskhakov_d_vertical_gauss_filter/common/include/common.hpp" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | |||
| 16 | namespace iskhakov_d_vertical_gauss_filter { | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | const int kDivConst = 16; | ||
| 20 | const std::array<std::array<int, 3>, 3> kGaussKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}}; | ||
| 21 | |||
| 22 | uint8_t IskhakovDGetPixelMirrorAll(const std::vector<uint8_t> &src, int col, int row, int width, int height) { | ||
| 23 | if (col < 0) { | ||
| 24 | col = -col - 1; | ||
| 25 | } else if (col >= width) { | ||
| 26 | col = (2 * width) - col - 1; | ||
| 27 | } | ||
| 28 | if (row < 0) { | ||
| 29 | row = -row - 1; | ||
| 30 | } else if (row >= height) { | ||
| 31 | row = (2 * height) - row - 1; | ||
| 32 | } | ||
| 33 | return src[(row * width) + col]; | ||
| 34 | } | ||
| 35 | |||
| 36 | void ProcessLocalBlock(const std::vector<uint8_t> &matrix, std::vector<uint8_t> &local_result, int width, int height, | ||
| 37 | int start_col, int end_col, int local_cols) { | ||
| 38 | 10 | #pragma omp parallel for default(none) \ | |
| 39 | shared(matrix, local_result, width, height, start_col, end_col, local_cols, kGaussKernel, kDivConst) | ||
| 40 | for (int horizontal_band = start_col; horizontal_band < end_col; ++horizontal_band) { | ||
| 41 | const int local_col_idx = horizontal_band - start_col; | ||
| 42 | for (int vertical_band = 0; vertical_band < height; ++vertical_band) { | ||
| 43 | int sum = 0; | ||
| 44 | |||
| 45 | sum += kGaussKernel[0][0] * | ||
| 46 | IskhakovDGetPixelMirrorAll(matrix, horizontal_band - 1, vertical_band - 1, width, height); | ||
| 47 | sum += kGaussKernel[0][1] * IskhakovDGetPixelMirrorAll(matrix, horizontal_band, vertical_band - 1, width, height); | ||
| 48 | sum += kGaussKernel[0][2] * | ||
| 49 | IskhakovDGetPixelMirrorAll(matrix, horizontal_band + 1, vertical_band - 1, width, height); | ||
| 50 | |||
| 51 | sum += kGaussKernel[1][0] * IskhakovDGetPixelMirrorAll(matrix, horizontal_band - 1, vertical_band, width, height); | ||
| 52 | sum += kGaussKernel[1][1] * IskhakovDGetPixelMirrorAll(matrix, horizontal_band, vertical_band, width, height); | ||
| 53 | sum += kGaussKernel[1][2] * IskhakovDGetPixelMirrorAll(matrix, horizontal_band + 1, vertical_band, width, height); | ||
| 54 | |||
| 55 | sum += kGaussKernel[2][0] * | ||
| 56 | IskhakovDGetPixelMirrorAll(matrix, horizontal_band - 1, vertical_band + 1, width, height); | ||
| 57 | sum += kGaussKernel[2][1] * IskhakovDGetPixelMirrorAll(matrix, horizontal_band, vertical_band + 1, width, height); | ||
| 58 | sum += kGaussKernel[2][2] * | ||
| 59 | IskhakovDGetPixelMirrorAll(matrix, horizontal_band + 1, vertical_band + 1, width, height); | ||
| 60 | |||
| 61 | local_result[(vertical_band * local_cols) + local_col_idx] = static_cast<uint8_t>(sum / kDivConst); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | void CopyOwnBand(const std::vector<uint8_t> &local_result, std::vector<uint8_t> &global_result, int width, int height, | ||
| 67 | int start_col, int end_col, int local_cols) { | ||
| 68 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 5 times.
|
18 | for (int vertical_band = 0; vertical_band < height; ++vertical_band) { |
| 69 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 13 times.
|
36 | for (int horizontal_band = start_col; horizontal_band < end_col; ++horizontal_band) { |
| 70 | 23 | const int local_col_idx = horizontal_band - start_col; | |
| 71 | 23 | global_result[(vertical_band * width) + horizontal_band] = | |
| 72 | 23 | local_result[(vertical_band * local_cols) + local_col_idx]; | |
| 73 | } | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | 5 | void ReceiveOtherBands(std::vector<uint8_t> &global_result, int width, int height, int size, int cols_per_proc, | |
| 78 | int remainder) { | ||
| 79 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (int sender_rank = 1; sender_rank < size; ++sender_rank) { |
| 80 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | const int sender_start_col = (sender_rank * cols_per_proc) + std::min(sender_rank, remainder); |
| 81 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | const int sender_cols = cols_per_proc + (sender_rank < remainder ? 1 : 0); |
| 82 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
|
5 | if (sender_cols == 0) { |
| 83 | 1 | continue; | |
| 84 | } | ||
| 85 |
1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
|
4 | std::vector<uint8_t> recv_buf(static_cast<size_t>(sender_cols) * height); |
| 86 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Recv(recv_buf.data(), static_cast<int>(recv_buf.size()), MPI_UNSIGNED_CHAR, sender_rank, 0, MPI_COMM_WORLD, |
| 87 | MPI_STATUS_IGNORE); | ||
| 88 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | for (int vertical_band = 0; vertical_band < height; ++vertical_band) { |
| 89 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 12 times.
|
28 | for (int col = 0; col < sender_cols; ++col) { |
| 90 | 16 | global_result[(vertical_band * width) + sender_start_col + col] = recv_buf[(vertical_band * sender_cols) + col]; | |
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | 5 | } | |
| 95 | |||
| 96 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | void SendOwnBand(const std::vector<uint8_t> &local_result) { |
| 97 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (!local_result.empty()) { |
| 98 | 4 | MPI_Send(local_result.data(), static_cast<int>(local_result.size()), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD); | |
| 99 | } | ||
| 100 | 5 | } | |
| 101 | |||
| 102 | 10 | void GatherAndBroadcast(const std::vector<uint8_t> &local_result, std::vector<uint8_t> &global_result, int width, | |
| 103 | int height, int rank, int size, int cols_per_proc, int remainder, int start_col, int end_col, | ||
| 104 | int local_cols) { | ||
| 105 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 106 | CopyOwnBand(local_result, global_result, width, height, start_col, end_col, local_cols); | ||
| 107 | 5 | ReceiveOtherBands(global_result, width, height, size, cols_per_proc, remainder); | |
| 108 | } else { | ||
| 109 | 5 | SendOwnBand(local_result); | |
| 110 | } | ||
| 111 | 10 | MPI_Bcast(global_result.data(), static_cast<int>(global_result.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | |
| 112 | 10 | } | |
| 113 | |||
| 114 | } // namespace | ||
| 115 | |||
| 116 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | IskhakovDVerticalGaussFilterALL::IskhakovDVerticalGaussFilterALL(const InType &in) { |
| 117 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 118 | GetInput() = in; | ||
| 119 | 10 | GetOutput() = OutType{}; | |
| 120 | 10 | } | |
| 121 | |||
| 122 | 10 | bool IskhakovDVerticalGaussFilterALL::ValidationImpl() { | |
| 123 | const auto &in = GetInput(); | ||
| 124 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
10 | if (in.width <= 0 || in.height <= 0) { |
| 125 | return false; | ||
| 126 | } | ||
| 127 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (in.data.size() != static_cast<size_t>(in.width) * static_cast<size_t>(in.height)) { |
| 128 | ✗ | return false; | |
| 129 | } | ||
| 130 | return true; | ||
| 131 | } | ||
| 132 | |||
| 133 | 10 | bool IskhakovDVerticalGaussFilterALL::PreProcessingImpl() { | |
| 134 | 10 | return true; | |
| 135 | } | ||
| 136 | |||
| 137 | 10 | bool IskhakovDVerticalGaussFilterALL::RunImpl() { | |
| 138 | const auto &in = GetInput(); | ||
| 139 | |||
| 140 | 10 | const int width = in.width; | |
| 141 | 10 | const int height = in.height; | |
| 142 | 10 | const std::vector<uint8_t> &matrix = in.data; | |
| 143 | |||
| 144 | 10 | int rank = 0; | |
| 145 | 10 | int size = 1; | |
| 146 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 147 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 148 | |||
| 149 | 10 | const int cols_per_proc = width / size; | |
| 150 | 10 | const int remainder = width % size; | |
| 151 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
|
10 | const int start_col = (rank * cols_per_proc) + std::min(rank, remainder); |
| 152 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
|
10 | const int end_col = start_col + cols_per_proc + (rank < remainder ? 1 : 0); |
| 153 | 10 | const int local_cols = end_col - start_col; | |
| 154 | |||
| 155 | 10 | std::vector<uint8_t> local_result(static_cast<size_t>(local_cols) * height); | |
| 156 | |||
| 157 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | omp_set_num_threads(ppc::util::GetNumThreads()); |
| 158 | |||
| 159 | ProcessLocalBlock(matrix, local_result, width, height, start_col, end_col, local_cols); | ||
| 160 | |||
| 161 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<uint8_t> global_result(static_cast<size_t>(width) * height); |
| 162 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | GatherAndBroadcast(local_result, global_result, width, height, rank, size, cols_per_proc, remainder, start_col, |
| 163 | end_col, local_cols); | ||
| 164 | |||
| 165 | 10 | GetOutput().width = width; | |
| 166 | 10 | GetOutput().height = height; | |
| 167 | 10 | GetOutput().data = std::move(global_result); | |
| 168 | |||
| 169 | 10 | return true; | |
| 170 | } | ||
| 171 | |||
| 172 | 10 | bool IskhakovDVerticalGaussFilterALL::PostProcessingImpl() { | |
| 173 | 10 | return true; | |
| 174 | } | ||
| 175 | |||
| 176 | } // namespace iskhakov_d_vertical_gauss_filter | ||
| 177 |