| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "shilin_n_gauss_filter_vertical_split/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "shilin_n_gauss_filter_vertical_split/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace shilin_n_gauss_filter_vertical_split { | ||
| 14 | |||
| 15 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | ShilinNGaussFilterVerticalSplitMPI::ShilinNGaussFilterVerticalSplitMPI(const InType &in) { |
| 16 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 17 | 14 | int rank = 0; | |
| 18 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 19 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 20 | GetInput() = in; | ||
| 21 | } | ||
| 22 | 14 | GetOutput() = std::vector<uint8_t>(); | |
| 23 | 14 | } | |
| 24 | |||
| 25 | 14 | bool ShilinNGaussFilterVerticalSplitMPI::ValidationImpl() { | |
| 26 | 14 | int rank = 0; | |
| 27 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 28 | |||
| 29 | 14 | int validation_result = 1; | |
| 30 | |||
| 31 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 32 | const InType &input = GetInput(); | ||
| 33 |
3/6✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 7 times.
|
7 | if (input.width <= 0 || input.height <= 0 || input.channels <= 0) { |
| 34 | ✗ | validation_result = 0; | |
| 35 | } else { | ||
| 36 | 7 | size_t expected_size = | |
| 37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | static_cast<size_t>(input.width) * static_cast<size_t>(input.height) * static_cast<size_t>(input.channels); |
| 38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (input.pixels.size() != expected_size) { |
| 39 | ✗ | validation_result = 0; | |
| 40 | } | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | 14 | MPI_Bcast(&validation_result, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 45 | |||
| 46 | 14 | return validation_result != 0; | |
| 47 | } | ||
| 48 | |||
| 49 | 14 | bool ShilinNGaussFilterVerticalSplitMPI::PreProcessingImpl() { | |
| 50 | 14 | GetOutput() = std::vector<uint8_t>(); | |
| 51 | 14 | return true; | |
| 52 | } | ||
| 53 | |||
| 54 | 14 | bool ShilinNGaussFilterVerticalSplitMPI::RunImpl() { | |
| 55 | 14 | int rank = 0; | |
| 56 | 14 | int size = 0; | |
| 57 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 58 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 59 | |||
| 60 | 14 | int width = 0; | |
| 61 | 14 | int height = 0; | |
| 62 | 14 | int channels = 0; | |
| 63 | |||
| 64 | 14 | std::vector<uint8_t> input_pixels; | |
| 65 | |||
| 66 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 67 | const InType &input = GetInput(); | ||
| 68 | 7 | width = input.width; | |
| 69 | 7 | height = input.height; | |
| 70 | 7 | channels = input.channels; | |
| 71 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | input_pixels = input.pixels; |
| 72 | } | ||
| 73 | |||
| 74 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 75 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 76 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(&channels, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 77 | |||
| 78 |
3/6✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
|
14 | if (width <= 0 || height <= 0 || channels <= 0) { |
| 79 | return false; | ||
| 80 | } | ||
| 81 | |||
| 82 | 14 | int local_width = 0; | |
| 83 | 14 | int local_start_col = 0; | |
| 84 | |||
| 85 | 14 | std::vector<uint8_t> local_input; | |
| 86 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | DistributeVerticalStripes(input_pixels, local_input, width, height, channels, rank, size, local_width, |
| 87 | local_start_col); | ||
| 88 | |||
| 89 | 14 | std::vector<uint8_t> local_output(static_cast<size_t>(local_width) * static_cast<size_t>(height) * | |
| 90 |
1/4✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | static_cast<size_t>(channels)); |
| 91 | 14 | ApplyGaussianKernelMPI(local_input, local_output, local_width, local_start_col, width, height, channels); | |
| 92 | |||
| 93 | 14 | std::vector<uint8_t> output_pixels; | |
| 94 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 95 | output_pixels = | ||
| 96 |
1/4✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | std::vector<uint8_t>(static_cast<size_t>(width) * static_cast<size_t>(height) * static_cast<size_t>(channels)); |
| 97 | } | ||
| 98 | |||
| 99 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | GatherVerticalStripes(local_output, output_pixels, width, height, channels, rank, size, local_width, local_start_col); |
| 100 | |||
| 101 | 14 | const auto output_size = static_cast<size_t>(width) * static_cast<size_t>(height) * static_cast<size_t>(channels); | |
| 102 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank != 0) { |
| 103 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | output_pixels.resize(output_size); |
| 104 | } | ||
| 105 | |||
| 106 | // синхронизируем итоговое изображение между всеми процессами, чтобы тесты не падали на worker ranks | ||
| 107 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(output_pixels.data(), static_cast<int>(output_size), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 108 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | GetOutput() = output_pixels; |
| 109 | |||
| 110 | return true; | ||
| 111 | } | ||
| 112 | |||
| 113 | 14 | bool ShilinNGaussFilterVerticalSplitMPI::PostProcessingImpl() { | |
| 114 | 14 | int rank = 0; | |
| 115 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 116 |
3/4✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
14 | return (rank == 0) ? !GetOutput().empty() : true; |
| 117 | } | ||
| 118 | |||
| 119 | 14 | void ShilinNGaussFilterVerticalSplitMPI::DistributeVerticalStripes(const std::vector<uint8_t> &source_image, | |
| 120 | std::vector<uint8_t> &destination_stripe, int width, | ||
| 121 | int height, int channels, int rank, int size, | ||
| 122 | int &local_width, int &local_start_col) { | ||
| 123 | // вертикальное разбиение: каждый процесс получает несколько столбцов | ||
| 124 | 14 | int base_cols_per_proc = width / size; | |
| 125 | 14 | int remainder = width % size; | |
| 126 | |||
| 127 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 1 times.
|
14 | local_start_col = (rank * base_cols_per_proc) + std::min(rank, remainder); |
| 128 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 1 times.
|
14 | local_width = base_cols_per_proc + (rank < remainder ? 1 : 0); |
| 129 | |||
| 130 | // для фильтра 3x3 нужны граничные пиксели слева и справа | ||
| 131 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | int left_padding = (local_start_col > 0) ? 1 : 0; |
| 132 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | int right_padding = (local_start_col + local_width < width) ? 1 : 0; |
| 133 | 14 | int extended_width = local_width + left_padding + right_padding; | |
| 134 | |||
| 135 | 14 | size_t local_size = static_cast<size_t>(extended_width) * static_cast<size_t>(height) * static_cast<size_t>(channels); | |
| 136 | 14 | destination_stripe = std::vector<uint8_t>(local_size); | |
| 137 | |||
| 138 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 139 | // процесс 0 отправляет данные остальным процессам | ||
| 140 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | for (int dest = 1; dest < size; ++dest) { |
| 141 | 7 | SendDataToProcess(source_image, dest, width, height, channels, base_cols_per_proc, remainder); | |
| 142 | } | ||
| 143 | |||
| 144 | // процесс 0 обрабатывает свои данные | ||
| 145 | 7 | CopyLocalData(source_image, destination_stripe, local_start_col, local_width, width, height, channels); | |
| 146 | } else { | ||
| 147 | // остальные процессы получают данные | ||
| 148 | 7 | MPI_Recv(destination_stripe.data(), static_cast<int>(destination_stripe.size()), MPI_UNSIGNED_CHAR, 0, 0, | |
| 149 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 150 | } | ||
| 151 | 14 | } | |
| 152 | |||
| 153 | 7 | void ShilinNGaussFilterVerticalSplitMPI::SendDataToProcess(const std::vector<uint8_t> &input, int dest, int width, | |
| 154 | int height, int channels, int base_cols_per_proc, | ||
| 155 | int remainder) { | ||
| 156 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int dest_start = (dest * base_cols_per_proc) + std::min(dest, remainder); |
| 157 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int dest_width = base_cols_per_proc + (dest < remainder ? 1 : 0); |
| 158 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | int dest_left_padding = (dest_start > 0) ? 1 : 0; |
| 159 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int dest_right_padding = (dest_start + dest_width < width) ? 1 : 0; |
| 160 | 7 | int dest_extended_width = dest_width + dest_left_padding + dest_right_padding; | |
| 161 | |||
| 162 | 7 | int send_start_col = dest_start - dest_left_padding; | |
| 163 | int send_width = dest_extended_width; | ||
| 164 | |||
| 165 | 7 | std::vector<uint8_t> send_data(static_cast<size_t>(send_width) * static_cast<size_t>(height) * | |
| 166 | 7 | static_cast<size_t>(channels)); | |
| 167 |
2/2✓ Branch 0 taken 217 times.
✓ Branch 1 taken 7 times.
|
224 | for (int row = 0; row < height; ++row) { |
| 168 |
2/2✓ Branch 0 taken 7179 times.
✓ Branch 1 taken 217 times.
|
7396 | for (int col = 0; col < send_width; ++col) { |
| 169 | 7179 | int src_x = send_start_col + col; | |
| 170 |
1/2✓ Branch 0 taken 7179 times.
✗ Branch 1 not taken.
|
7179 | if (src_x >= 0 && src_x < width) { |
| 171 |
2/2✓ Branch 0 taken 21537 times.
✓ Branch 1 taken 7179 times.
|
28716 | for (int ch = 0; ch < channels; ++ch) { |
| 172 | 21537 | size_t src_idx = (static_cast<size_t>(row) * static_cast<size_t>(width) * static_cast<size_t>(channels)) + | |
| 173 | 21537 | (static_cast<size_t>(src_x) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 174 | 21537 | size_t dst_idx = | |
| 175 | 21537 | (static_cast<size_t>(row) * static_cast<size_t>(send_width) * static_cast<size_t>(channels)) + | |
| 176 | 21537 | (static_cast<size_t>(col) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 177 | 21537 | send_data[dst_idx] = input[src_idx]; | |
| 178 | } | ||
| 179 | } | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | MPI_Send(send_data.data(), static_cast<int>(send_data.size()), MPI_UNSIGNED_CHAR, dest, 0, MPI_COMM_WORLD); |
| 184 | 7 | } | |
| 185 | |||
| 186 | 7 | void ShilinNGaussFilterVerticalSplitMPI::CopyLocalData(const std::vector<uint8_t> &input, | |
| 187 | std::vector<uint8_t> &local_data, int local_start_col, | ||
| 188 | int local_width, int width, int height, int channels) { | ||
| 189 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int left_padding = (local_start_col > 0) ? 1 : 0; |
| 190 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | int right_padding = (local_start_col + local_width < width) ? 1 : 0; |
| 191 | 7 | int extended_width = local_width + left_padding + right_padding; | |
| 192 | |||
| 193 |
2/2✓ Branch 0 taken 217 times.
✓ Branch 1 taken 7 times.
|
224 | for (int row = 0; row < height; ++row) { |
| 194 |
2/2✓ Branch 0 taken 7184 times.
✓ Branch 1 taken 217 times.
|
7401 | for (int col = -left_padding; col < local_width + right_padding; ++col) { |
| 195 | 7184 | int src_x = local_start_col + col; | |
| 196 |
1/2✓ Branch 0 taken 7184 times.
✗ Branch 1 not taken.
|
7184 | if (src_x >= 0 && src_x < width) { |
| 197 |
2/2✓ Branch 0 taken 21552 times.
✓ Branch 1 taken 7184 times.
|
28736 | for (int ch = 0; ch < channels; ++ch) { |
| 198 | 21552 | size_t src_idx = (static_cast<size_t>(row) * static_cast<size_t>(width) * static_cast<size_t>(channels)) + | |
| 199 | 21552 | (static_cast<size_t>(src_x) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 200 | 21552 | size_t dst_idx = | |
| 201 | 21552 | (static_cast<size_t>(row) * static_cast<size_t>(extended_width) * static_cast<size_t>(channels)) + | |
| 202 | 21552 | (static_cast<size_t>(col + left_padding) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 203 | 21552 | local_data[dst_idx] = input[src_idx]; | |
| 204 | } | ||
| 205 | } | ||
| 206 | } | ||
| 207 | } | ||
| 208 | 7 | } | |
| 209 | |||
| 210 | 14 | void ShilinNGaussFilterVerticalSplitMPI::ApplyGaussianKernelMPI(const std::vector<uint8_t> &local_input, | |
| 211 | std::vector<uint8_t> &local_output, int local_width, | ||
| 212 | int local_start_col, int width, int height, | ||
| 213 | int channels) { | ||
| 214 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | int left_padding = (local_start_col > 0) ? 1 : 0; |
| 215 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | int right_padding = (local_start_col + local_width < width) ? 1 : 0; |
| 216 | 14 | int extended_width = local_width + left_padding + right_padding; | |
| 217 | |||
| 218 |
2/2✓ Branch 0 taken 434 times.
✓ Branch 1 taken 14 times.
|
448 | for (int row = 0; row < height; ++row) { |
| 219 |
2/2✓ Branch 0 taken 13929 times.
✓ Branch 1 taken 434 times.
|
14363 | for (int local_col = 0; local_col < local_width; ++local_col) { |
| 220 | 13929 | ProcessPixelWithKernel(local_input, local_output, row, local_col, local_width, left_padding, extended_width, | |
| 221 | height, channels); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | 14 | } | |
| 225 | |||
| 226 | 13929 | void ShilinNGaussFilterVerticalSplitMPI::ProcessPixelWithKernel(const std::vector<uint8_t> &local_input, | |
| 227 | std::vector<uint8_t> &local_output, int row, | ||
| 228 | int local_col, int local_width, int left_padding, | ||
| 229 | int extended_width, int height, int channels) { | ||
| 230 | // ядро гаусса 3x3 | ||
| 231 | 13929 | constexpr std::array<std::array<double, 3>, 3> kKernel = {{{{1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0}}, | |
| 232 | {{2.0 / 16.0, 4.0 / 16.0, 2.0 / 16.0}}, | ||
| 233 | {{1.0 / 16.0, 2.0 / 16.0, 1.0 / 16.0}}}}; | ||
| 234 | |||
| 235 | 13929 | int col_in_extended = local_col + left_padding; | |
| 236 | |||
| 237 |
2/2✓ Branch 0 taken 41787 times.
✓ Branch 1 taken 13929 times.
|
55716 | for (int ch = 0; ch < channels; ++ch) { |
| 238 | 41787 | double sum = 0.0; | |
| 239 | |||
| 240 |
2/2✓ Branch 0 taken 125361 times.
✓ Branch 1 taken 41787 times.
|
167148 | for (int ky = -1; ky <= 1; ++ky) { |
| 241 |
2/2✓ Branch 0 taken 376083 times.
✓ Branch 1 taken 125361 times.
|
501444 | for (int kx = -1; kx <= 1; ++kx) { |
| 242 | 376083 | int px = col_in_extended + kx; | |
| 243 | 376083 | int py = row + ky; | |
| 244 | |||
| 245 | double pixel_val = 0.0; | ||
| 246 |
4/4✓ Branch 0 taken 372177 times.
✓ Branch 1 taken 3906 times.
✓ Branch 2 taken 368355 times.
✓ Branch 3 taken 3822 times.
|
376083 | if (px >= 0 && px < extended_width && py >= 0 && py < height) { |
| 247 | 368355 | size_t idx = (static_cast<size_t>(py) * static_cast<size_t>(extended_width) * static_cast<size_t>(channels)) + | |
| 248 | 368355 | (static_cast<size_t>(px) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 249 | 368355 | pixel_val = static_cast<double>(local_input[idx]); | |
| 250 | } | ||
| 251 | 376083 | const int kernel_y_idx = ky + 1; | |
| 252 | 376083 | const int kernel_x_idx = kx + 1; | |
| 253 | 376083 | const auto kernel_y = static_cast<size_t>(kernel_y_idx); | |
| 254 | 376083 | const auto kernel_x = static_cast<size_t>(kernel_x_idx); | |
| 255 | 376083 | sum += pixel_val * kKernel.at(kernel_y).at(kernel_x); | |
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | 41787 | size_t out_idx = (static_cast<size_t>(row) * static_cast<size_t>(local_width) * static_cast<size_t>(channels)) + | |
| 260 | 41787 | (static_cast<size_t>(local_col) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 261 | 41787 | local_output[out_idx] = static_cast<uint8_t>(std::clamp(sum, 0.0, 255.0)); | |
| 262 | } | ||
| 263 | 13929 | } | |
| 264 | |||
| 265 | 14 | void ShilinNGaussFilterVerticalSplitMPI::GatherVerticalStripes(const std::vector<uint8_t> &local_stripe, | |
| 266 | std::vector<uint8_t> &final_image, int width, int height, | ||
| 267 | int channels, int rank, int size, int local_width, | ||
| 268 | int local_start_col) { | ||
| 269 | 14 | int base_cols_per_proc = width / size; | |
| 270 | 14 | int remainder = width % size; | |
| 271 | |||
| 272 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 273 | int src_start = (0 * base_cols_per_proc) + std::min(0, remainder); | ||
| 274 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
|
7 | int src_width = base_cols_per_proc + (0 < remainder ? 1 : 0); |
| 275 | GatherFromRank0(local_stripe, final_image, width, height, channels, size, local_width, src_start, src_width); | ||
| 276 | 7 | GatherFromOtherRanks(final_image, width, height, channels, size, base_cols_per_proc, remainder); | |
| 277 | } else { | ||
| 278 | SendUnpaddedData(local_stripe, local_width, local_start_col, width, height, channels); | ||
| 279 | } | ||
| 280 | 14 | } | |
| 281 | |||
| 282 | ✗ | void ShilinNGaussFilterVerticalSplitMPI::GatherFromRank0(const std::vector<uint8_t> &local_stripe, | |
| 283 | std::vector<uint8_t> &final_image, int width, int height, | ||
| 284 | int channels, int /* size */, int local_width, int src_start, | ||
| 285 | int src_width) { | ||
| 286 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 217 times.
✓ Branch 3 taken 7 times.
|
224 | for (int row = 0; row < height; ++row) { |
| 287 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 6967 times.
✓ Branch 3 taken 217 times.
|
7184 | for (int col = 0; col < src_width; ++col) { |
| 288 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 20901 times.
✓ Branch 3 taken 6967 times.
|
27868 | for (int ch = 0; ch < channels; ++ch) { |
| 289 | 20901 | size_t local_idx = | |
| 290 | 20901 | (static_cast<size_t>(row) * static_cast<size_t>(local_width) * static_cast<size_t>(channels)) + | |
| 291 | 20901 | (static_cast<size_t>(col) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 292 | 20901 | size_t global_idx = (static_cast<size_t>(row) * static_cast<size_t>(width) * static_cast<size_t>(channels)) + | |
| 293 | 20901 | (static_cast<size_t>(src_start + col) * static_cast<size_t>(channels)) + | |
| 294 | static_cast<size_t>(ch); | ||
| 295 | 20901 | final_image[global_idx] = local_stripe[local_idx]; | |
| 296 | } | ||
| 297 | } | ||
| 298 | } | ||
| 299 | ✗ | } | |
| 300 | |||
| 301 | 7 | void ShilinNGaussFilterVerticalSplitMPI::GatherFromOtherRanks(std::vector<uint8_t> &final_image, int width, int height, | |
| 302 | int channels, int size, int base_cols_per_proc, | ||
| 303 | int remainder) { | ||
| 304 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | for (int src = 1; src < size; ++src) { |
| 305 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int src_start = (src * base_cols_per_proc) + std::min(src, remainder); |
| 306 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | int src_width = base_cols_per_proc + (src < remainder ? 1 : 0); |
| 307 | |||
| 308 | 7 | std::vector<uint8_t> recv_data(static_cast<size_t>(src_width) * static_cast<size_t>(height) * | |
| 309 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | static_cast<size_t>(channels)); |
| 310 | |||
| 311 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | MPI_Recv(recv_data.data(), static_cast<int>(recv_data.size()), MPI_UNSIGNED_CHAR, src, 0, MPI_COMM_WORLD, |
| 312 | MPI_STATUS_IGNORE); | ||
| 313 | |||
| 314 |
2/2✓ Branch 0 taken 217 times.
✓ Branch 1 taken 7 times.
|
224 | for (int row = 0; row < height; ++row) { |
| 315 |
2/2✓ Branch 0 taken 6962 times.
✓ Branch 1 taken 217 times.
|
7179 | for (int col = 0; col < src_width; ++col) { |
| 316 |
2/2✓ Branch 0 taken 20886 times.
✓ Branch 1 taken 6962 times.
|
27848 | for (int ch = 0; ch < channels; ++ch) { |
| 317 | 20886 | size_t recv_idx = | |
| 318 | 20886 | (static_cast<size_t>(row) * static_cast<size_t>(src_width) * static_cast<size_t>(channels)) + | |
| 319 | 20886 | (static_cast<size_t>(col) * static_cast<size_t>(channels)) + static_cast<size_t>(ch); | |
| 320 | 20886 | size_t global_idx = (static_cast<size_t>(row) * static_cast<size_t>(width) * static_cast<size_t>(channels)) + | |
| 321 | 20886 | (static_cast<size_t>(src_start + col) * static_cast<size_t>(channels)) + | |
| 322 | static_cast<size_t>(ch); | ||
| 323 | 20886 | final_image[global_idx] = recv_data[recv_idx]; | |
| 324 | } | ||
| 325 | } | ||
| 326 | } | ||
| 327 | } | ||
| 328 | 7 | } | |
| 329 | |||
| 330 | ✗ | void ShilinNGaussFilterVerticalSplitMPI::SendUnpaddedData(const std::vector<uint8_t> &local_stripe, int local_width, | |
| 331 | int /*local_start_col*/, int /*width*/, int height, | ||
| 332 | int channels) { | ||
| 333 | // local_stripe уже содержит unpadded данные (local_output после ApplyGaussianKernelMPI) | ||
| 334 | // размер: local_width * height * channels | ||
| 335 | 7 | size_t data_size = static_cast<size_t>(local_width) * static_cast<size_t>(height) * static_cast<size_t>(channels); | |
| 336 | 7 | MPI_Send(local_stripe.data(), static_cast<int>(data_size), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD); | |
| 337 | 7 | } | |
| 338 | |||
| 339 | } // namespace shilin_n_gauss_filter_vertical_split | ||
| 340 |