| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "batkov_f_image_smoothing/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "batkov_f_image_smoothing/common/include/common.hpp" | ||
| 13 | |||
| 14 | namespace batkov_f_image_smoothing { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | 8 | void CopyLocalImageData(const std::vector<uint8_t> &img_data, size_t width, size_t channels, size_t local_start, | |
| 19 | size_t local_height, std::vector<uint8_t> &local_data) { | ||
| 20 |
2/2✓ Branch 0 taken 526 times.
✓ Branch 1 taken 8 times.
|
534 | for (size_t y_pos = 0; y_pos < local_height; y_pos++) { |
| 21 | 526 | size_t global_y = local_start + y_pos; | |
| 22 |
2/2✓ Branch 0 taken 117140 times.
✓ Branch 1 taken 526 times.
|
117666 | for (size_t x_pos = 0; x_pos < width; x_pos++) { |
| 23 |
2/2✓ Branch 0 taken 351420 times.
✓ Branch 1 taken 117140 times.
|
468560 | for (size_t ch = 0; ch < channels; ch++) { |
| 24 | 351420 | local_data[((y_pos * width + x_pos) * channels) + ch] = img_data[((global_y * width + x_pos) * channels) + ch]; | |
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 28 | 8 | } | |
| 29 | |||
| 30 | 8 | void ProcessLocalImage(const std::vector<uint8_t> &local_data, const std::vector<std::vector<float>> &kernel, | |
| 31 | size_t width, size_t channels, size_t start_row, size_t end_row, size_t local_start, | ||
| 32 | size_t local_height, size_t kernel_size, size_t half, std::vector<uint8_t> &local_result) { | ||
| 33 |
2/2✓ Branch 0 taken 510 times.
✓ Branch 1 taken 8 times.
|
518 | for (size_t y_local = 0; y_local < (end_row - start_row); y_local++) { |
| 34 | 510 | size_t y_in_local_data = y_local + (start_row - local_start); | |
| 35 |
2/2✓ Branch 0 taken 115100 times.
✓ Branch 1 taken 510 times.
|
115610 | for (size_t x_px = 0; x_px < width; x_px++) { |
| 36 |
2/2✓ Branch 0 taken 345300 times.
✓ Branch 1 taken 115100 times.
|
460400 | for (size_t ch = 0; ch < channels; ch++) { |
| 37 | float value = 0.0F; | ||
| 38 |
2/2✓ Branch 0 taken 1726500 times.
✓ Branch 1 taken 345300 times.
|
2071800 | for (size_t ky = 0; ky < kernel_size; ky++) { |
| 39 |
2/2✓ Branch 0 taken 8632500 times.
✓ Branch 1 taken 1726500 times.
|
10359000 | for (size_t kx = 0; kx < kernel_size; kx++) { |
| 40 | 8632500 | size_t px = x_px + kx - half; | |
| 41 | 8632500 | size_t py = y_in_local_data + ky - half; | |
| 42 | |||
| 43 |
4/4✓ Branch 0 taken 45900 times.
✓ Branch 1 taken 8586600 times.
✓ Branch 2 taken 8609550 times.
✓ Branch 3 taken 22950 times.
|
8678400 | px = std::max<size_t>(0, std::min(px, width - 1)); |
| 44 |
4/4✓ Branch 0 taken 45900 times.
✓ Branch 1 taken 8586600 times.
✓ Branch 2 taken 8601900 times.
✓ Branch 3 taken 30600 times.
|
8678400 | py = std::max<size_t>(0, std::min(py, local_height - 1)); |
| 45 | |||
| 46 | 8632500 | uint8_t pixel_value = local_data[((py * width + px) * channels) + ch]; | |
| 47 | 8632500 | value += static_cast<float>(pixel_value) * kernel[ky][kx]; | |
| 48 | } | ||
| 49 | } | ||
| 50 | 345300 | local_result[((y_local * width + x_px) * channels) + ch] = static_cast<uint8_t>(value); | |
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | 8 | } | |
| 55 | |||
| 56 | 4 | void CopyProcess0Result(const std::vector<uint8_t> &local_result, size_t width, size_t channels, size_t start_row, | |
| 57 | size_t end_row, std::vector<uint8_t> &result) { | ||
| 58 | 4 | size_t process0_rows = end_row - start_row; | |
| 59 |
2/2✓ Branch 0 taken 255 times.
✓ Branch 1 taken 4 times.
|
259 | for (size_t y_pos = 0; y_pos < process0_rows; y_pos++) { |
| 60 |
2/2✓ Branch 0 taken 57550 times.
✓ Branch 1 taken 255 times.
|
57805 | for (size_t x_pos = 0; x_pos < width; x_pos++) { |
| 61 |
2/2✓ Branch 0 taken 172650 times.
✓ Branch 1 taken 57550 times.
|
230200 | for (size_t ch = 0; ch < channels; ch++) { |
| 62 | 172650 | result[(((start_row + y_pos) * width + x_pos) * channels) + ch] = | |
| 63 | 172650 | local_result[((y_pos * width + x_pos) * channels) + ch]; | |
| 64 | } | ||
| 65 | } | ||
| 66 | } | ||
| 67 | 4 | } | |
| 68 | |||
| 69 | 4 | void GatherResultsFromProcesses(size_t size, size_t width, size_t channels, size_t rows_per_process, size_t remainder, | |
| 70 | size_t start_row, size_t end_row, const std::vector<uint8_t> &local_result, | ||
| 71 | std::vector<uint8_t> &result) { | ||
| 72 | 4 | CopyProcess0Result(local_result, width, channels, start_row, end_row, result); | |
| 73 | |||
| 74 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | for (size_t proc = 1; proc < size; proc++) { |
| 75 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | size_t p_start = (proc * rows_per_process) + std::min<size_t>(proc, remainder); |
| 76 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | size_t p_end = p_start + rows_per_process + (proc < remainder ? 1 : 0); |
| 77 | 4 | size_t p_rows = p_end - p_start; | |
| 78 | 4 | size_t p_data_size = width * p_rows * channels; | |
| 79 | |||
| 80 | 4 | std::vector<uint8_t> recv_buffer(p_data_size); | |
| 81 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Recv(recv_buffer.data(), static_cast<int>(p_data_size), MPI_UNSIGNED_CHAR, static_cast<int>(proc), 0, |
| 82 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 83 | |||
| 84 |
2/2✓ Branch 0 taken 255 times.
✓ Branch 1 taken 4 times.
|
259 | for (size_t y_pos = 0; y_pos < p_rows; y_pos++) { |
| 85 |
2/2✓ Branch 0 taken 57550 times.
✓ Branch 1 taken 255 times.
|
57805 | for (size_t x_pos = 0; x_pos < width; x_pos++) { |
| 86 |
2/2✓ Branch 0 taken 172650 times.
✓ Branch 1 taken 57550 times.
|
230200 | for (size_t ch = 0; ch < channels; ch++) { |
| 87 | 172650 | result[(((p_start + y_pos) * width + x_pos) * channels) + ch] = | |
| 88 | 172650 | recv_buffer[((y_pos * width + x_pos) * channels) + ch]; | |
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | 4 | } | |
| 94 | |||
| 95 | } // namespace | ||
| 96 | |||
| 97 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | BatkovFImageSmoothingMPI::BatkovFImageSmoothingMPI(const InType &in) { |
| 98 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 99 | GetInput() = in; | ||
| 100 | 8 | GetOutput() = Image(); | |
| 101 | 8 | } | |
| 102 | |||
| 103 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | bool BatkovFImageSmoothingMPI::ValidationImpl() { |
| 104 |
3/6✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 8 times.
|
8 | return (!GetInput().data.empty()) && (GetInput().width > 0) && (GetInput().height > 0); |
| 105 | } | ||
| 106 | |||
| 107 | 8 | bool BatkovFImageSmoothingMPI::PreProcessingImpl() { | |
| 108 | size_t size = 5; | ||
| 109 | float sigma = 1.0F; | ||
| 110 | |||
| 111 | 8 | gaussian_kernel_.resize(size); | |
| 112 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.
|
48 | for (auto &v : gaussian_kernel_) { |
| 113 | 40 | v.resize(size); | |
| 114 | } | ||
| 115 | |||
| 116 | float sum = 0.0F; | ||
| 117 | size_t half = size / 2; | ||
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.
|
48 | for (size_t i = 0; i < size; i++) { |
| 120 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 40 times.
|
240 | for (size_t j = 0; j < size; j++) { |
| 121 | 200 | size_t x = i - half; | |
| 122 | 200 | size_t y = j - half; | |
| 123 | 200 | float value = std::exp((-static_cast<float>((x * x) + (y * y)) / (2 * sigma * sigma))); | |
| 124 | 200 | gaussian_kernel_[i][j] = value; | |
| 125 | 200 | sum += value; | |
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.
|
48 | for (size_t i = 0; i < size; i++) { |
| 130 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 40 times.
|
240 | for (size_t j = 0; j < size; j++) { |
| 131 | 200 | gaussian_kernel_[i][j] /= sum; | |
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | 8 | return true; | |
| 136 | } | ||
| 137 | |||
| 138 | 8 | bool BatkovFImageSmoothingMPI::RunImpl() { | |
| 139 | 8 | int int_rank = 0; | |
| 140 | 8 | int int_size = 0; | |
| 141 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &int_rank); | |
| 142 | 8 | MPI_Comm_size(MPI_COMM_WORLD, &int_size); | |
| 143 | |||
| 144 | auto &img = GetInput(); | ||
| 145 | 8 | size_t width = img.width; | |
| 146 | 8 | size_t height = img.height; | |
| 147 | 8 | size_t channels = img.channels; | |
| 148 | 8 | const auto &img_data = img.data; | |
| 149 | |||
| 150 | 8 | const auto rank = static_cast<size_t>(int_rank); | |
| 151 | 8 | const auto size = static_cast<size_t>(int_size); | |
| 152 | const size_t kernel_size = 5; | ||
| 153 | const size_t half = kernel_size / 2; | ||
| 154 | |||
| 155 | 8 | size_t rows_per_process = height / size; | |
| 156 | 8 | size_t remainder = height % size; | |
| 157 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | size_t start_row = (rank * rows_per_process) + std::min<size_t>(rank, remainder); |
| 158 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | size_t end_row = start_row + rows_per_process + (rank < remainder ? 1 : 0); |
| 159 | |||
| 160 | 8 | size_t local_start = (start_row > half) ? start_row - half : 0; | |
| 161 | 8 | size_t local_end = (end_row + half < height) ? end_row + half : height; | |
| 162 | 8 | size_t local_height = local_end - local_start; | |
| 163 | |||
| 164 | 8 | std::vector<uint8_t> local_data(width * local_height * channels); | |
| 165 | 8 | CopyLocalImageData(img_data, width, channels, local_start, local_height, local_data); | |
| 166 | |||
| 167 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<uint8_t> local_result(width * (end_row - start_row) * channels); |
| 168 | 8 | ProcessLocalImage(local_data, gaussian_kernel_, width, channels, start_row, end_row, local_start, local_height, | |
| 169 | kernel_size, half, local_result); | ||
| 170 | |||
| 171 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<uint8_t> result(width * height * channels); |
| 172 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 173 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | GatherResultsFromProcesses(size, width, channels, rows_per_process, remainder, start_row, end_row, local_result, |
| 174 | result); | ||
| 175 | } else { | ||
| 176 | size_t data_size = width * (end_row - start_row) * channels; | ||
| 177 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Send(local_result.data(), static_cast<int>(data_size), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD); |
| 178 | } | ||
| 179 | |||
| 180 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Barrier(MPI_COMM_WORLD); |
| 181 | |||
| 182 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Bcast(result.data(), static_cast<int>(width * height * channels), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 183 | |||
| 184 | 8 | GetOutput().data = std::move(result); | |
| 185 | 8 | GetOutput().width = width; | |
| 186 | 8 | GetOutput().height = height; | |
| 187 | 8 | GetOutput().channels = channels; | |
| 188 | |||
| 189 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Barrier(MPI_COMM_WORLD); |
| 190 | 8 | return true; | |
| 191 | } | ||
| 192 | |||
| 193 | 8 | bool BatkovFImageSmoothingMPI::PostProcessingImpl() { | |
| 194 | 8 | return true; | |
| 195 | } | ||
| 196 | |||
| 197 | } // namespace batkov_f_image_smoothing | ||
| 198 |