| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "marin_l_linear_filter_vertical/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "marin_l_linear_filter_vertical/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace marin_l_linear_filter_vertical { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | constexpr std::array<std::array<int, 3>, 3> kGaussKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}}; | ||
| 17 | constexpr int kKernelSum = 16; | ||
| 18 | |||
| 19 | 14014 | uint8_t ApplyKernelLocal(const std::vector<uint8_t> &local_input, int ext_col_count, int height, int row, int lx, | |
| 20 | int left_pad) { | ||
| 21 | int sum = 0; | ||
| 22 |
2/2✓ Branch 0 taken 42042 times.
✓ Branch 1 taken 14014 times.
|
56056 | for (int ky = -1; ky <= 1; ++ky) { |
| 23 |
2/2✓ Branch 0 taken 126126 times.
✓ Branch 1 taken 42042 times.
|
168168 | for (int kx = -1; kx <= 1; ++kx) { |
| 24 | 126126 | int ny = row + ky; | |
| 25 | 126126 | int local_nx = lx + left_pad + kx; | |
| 26 | uint8_t pixel_value = 0; | ||
| 27 |
4/4✓ Branch 0 taken 124614 times.
✓ Branch 1 taken 1512 times.
✓ Branch 2 taken 123150 times.
✓ Branch 3 taken 1464 times.
|
126126 | if (ny >= 0 && ny < height && local_nx >= 0 && local_nx < ext_col_count) { |
| 28 | 123150 | pixel_value = local_input[(ny * ext_col_count) + local_nx]; | |
| 29 | } | ||
| 30 | 126126 | sum += pixel_value * kGaussKernel.at(ky + 1).at(kx + 1); | |
| 31 | } | ||
| 32 | } | ||
| 33 |
1/2✓ Branch 0 taken 14014 times.
✗ Branch 1 not taken.
|
14014 | return static_cast<uint8_t>(std::clamp(sum / kKernelSum, 0, 255)); |
| 34 | } | ||
| 35 | |||
| 36 | void ComputeColDistribution(int width, int size, std::vector<int> &col_counts, std::vector<int> &col_starts) { | ||
| 37 | 24 | int cols_per_proc = width / size; | |
| 38 | 24 | int extra_cols = width % size; | |
| 39 | int current_col = 0; | ||
| 40 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 24 times.
|
72 | for (int proc = 0; proc < size; ++proc) { |
| 41 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 12 times.
|
48 | col_starts[proc] = current_col; |
| 42 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 12 times.
|
84 | col_counts[proc] = cols_per_proc + ((proc < extra_cols) ? 1 : 0); |
| 43 | 48 | current_col += col_counts[proc]; | |
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | void ExtractLocalData(const std::vector<uint8_t> &input_pixels, std::vector<uint8_t> &local_input, int width, | ||
| 48 | int height, int ext_start, int ext_col_count) { | ||
| 49 |
4/4✓ Branch 0 taken 252 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 246 times.
✓ Branch 3 taken 10 times.
|
520 | for (int row = 0; row < height; ++row) { |
| 50 |
4/4✓ Branch 0 taken 7278 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 7228 times.
✓ Branch 3 taken 246 times.
|
15004 | for (int col = 0; col < ext_col_count; ++col) { |
| 51 | 14506 | local_input[(row * ext_col_count) + col] = input_pixels[(row * width) + (ext_start + col)]; | |
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void CopyLocalToOutput(std::vector<uint8_t> &output_pixels, const std::vector<uint8_t> &local_output, int width, | ||
| 57 | int height, int col_start, int col_count) { | ||
| 58 |
4/4✓ Branch 0 taken 252 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 246 times.
✓ Branch 3 taken 10 times.
|
520 | for (int row = 0; row < height; ++row) { |
| 59 |
4/4✓ Branch 0 taken 7032 times.
✓ Branch 1 taken 252 times.
✓ Branch 2 taken 6982 times.
✓ Branch 3 taken 246 times.
|
14512 | for (int col = 0; col < col_count; ++col) { |
| 60 | 14014 | output_pixels[(row * width) + (col_start + col)] = local_output[(row * col_count) + col]; | |
| 61 | } | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | int ComputePadding(int col_start, int col_count, int width, bool is_left) { | ||
| 66 | if (is_left) { | ||
| 67 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | return (col_start > 0) ? 1 : 0; |
| 68 | } | ||
| 69 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | return ((col_start + col_count) < width) ? 1 : 0; |
| 70 | } | ||
| 71 | |||
| 72 | 12 | void SendDataToWorkers(const std::vector<uint8_t> &input_pixels, const std::vector<int> &col_counts, | |
| 73 | const std::vector<int> &col_starts, int width, int height, int size) { | ||
| 74 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int proc = 1; proc < size; ++proc) { |
| 75 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
|
12 | if (col_counts[proc] == 0) { |
| 76 | 2 | continue; | |
| 77 | } | ||
| 78 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | int p_col_start = col_starts[proc]; |
| 79 | int p_col_count = col_counts[proc]; | ||
| 80 | int p_left_pad = ComputePadding(p_col_start, p_col_count, width, true); | ||
| 81 | int p_right_pad = ComputePadding(p_col_start, p_col_count, width, false); | ||
| 82 | 10 | int p_ext_col_count = p_col_count + p_left_pad + p_right_pad; | |
| 83 | 10 | int p_ext_start = p_col_start - p_left_pad; | |
| 84 | |||
| 85 | 10 | std::vector<uint8_t> send_buf(static_cast<size_t>(p_ext_col_count) * static_cast<size_t>(height)); | |
| 86 | ExtractLocalData(input_pixels, send_buf, width, height, p_ext_start, p_ext_col_count); | ||
| 87 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(send_buf.data(), static_cast<int>(send_buf.size()), MPI_UNSIGNED_CHAR, proc, 0, MPI_COMM_WORLD); |
| 88 | } | ||
| 89 | 12 | } | |
| 90 | |||
| 91 | 12 | void ReceiveResultsFromWorkers(std::vector<uint8_t> &output_pixels, const std::vector<int> &col_counts, | |
| 92 | const std::vector<int> &col_starts, int width, int height, int size) { | ||
| 93 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int proc = 1; proc < size; ++proc) { |
| 94 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
|
12 | if (col_counts[proc] == 0) { |
| 95 | 2 | continue; | |
| 96 | } | ||
| 97 | 10 | int p_col_start = col_starts[proc]; | |
| 98 | int p_col_count = col_counts[proc]; | ||
| 99 |
1/2✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
10 | std::vector<uint8_t> recv_buf(static_cast<size_t>(p_col_count) * static_cast<size_t>(height)); |
| 100 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(recv_buf.data(), static_cast<int>(recv_buf.size()), MPI_UNSIGNED_CHAR, proc, 1, MPI_COMM_WORLD, |
| 101 | MPI_STATUS_IGNORE); | ||
| 102 | CopyLocalToOutput(output_pixels, recv_buf, width, height, p_col_start, p_col_count); | ||
| 103 | } | ||
| 104 | 12 | } | |
| 105 | |||
| 106 | 22 | void ApplyFilterToLocalData(const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output, | |
| 107 | int ext_col_count, int local_col_count, int height, int left_pad) { | ||
| 108 |
2/2✓ Branch 0 taken 498 times.
✓ Branch 1 taken 22 times.
|
520 | for (int row = 0; row < height; ++row) { |
| 109 |
2/2✓ Branch 0 taken 14014 times.
✓ Branch 1 taken 498 times.
|
14512 | for (int lx = 0; lx < local_col_count; ++lx) { |
| 110 | 14014 | local_output[(row * local_col_count) + lx] = | |
| 111 | 14014 | ApplyKernelLocal(local_input, ext_col_count, height, row, lx, left_pad); | |
| 112 | } | ||
| 113 | } | ||
| 114 | 22 | } | |
| 115 | } // namespace | ||
| 116 | |||
| 117 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MarinLLinearFilterVerticalMPI::MarinLLinearFilterVerticalMPI(const InType &in) { |
| 118 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 119 | GetInput() = in; | ||
| 120 | GetOutput() = {}; | ||
| 121 | 24 | } | |
| 122 | |||
| 123 | 24 | bool MarinLLinearFilterVerticalMPI::ValidationImpl() { | |
| 124 | 24 | int rank = 0; | |
| 125 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 126 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (rank == 0) { |
| 127 | const auto &input = GetInput(); | ||
| 128 |
2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | if (input.width <= 0 || input.height <= 0) { |
| 129 | return false; | ||
| 130 | } | ||
| 131 | 12 | auto expected_size = static_cast<size_t>(input.width) * static_cast<size_t>(input.height); | |
| 132 | 12 | return input.pixels.size() == expected_size; | |
| 133 | } | ||
| 134 | return true; | ||
| 135 | } | ||
| 136 | |||
| 137 | 24 | bool MarinLLinearFilterVerticalMPI::PreProcessingImpl() { | |
| 138 | 24 | int rank = 0; | |
| 139 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 140 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (rank == 0) { |
| 141 | const auto &input = GetInput(); | ||
| 142 | 12 | width_ = input.width; | |
| 143 | 12 | height_ = input.height; | |
| 144 | 12 | input_pixels_ = input.pixels; | |
| 145 | } | ||
| 146 | 24 | return true; | |
| 147 | } | ||
| 148 | |||
| 149 | 24 | bool MarinLLinearFilterVerticalMPI::RunImpl() { | |
| 150 | 24 | int rank = 0; | |
| 151 | 24 | int size = 0; | |
| 152 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 153 | 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 154 | |||
| 155 | 24 | MPI_Bcast(&width_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 156 | 24 | MPI_Bcast(&height_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 157 | |||
| 158 | 24 | output_pixels_.resize(static_cast<size_t>(width_) * static_cast<size_t>(height_)); | |
| 159 | |||
| 160 | 24 | std::vector<int> col_counts(size); | |
| 161 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<int> col_starts(size); |
| 162 | 24 | ComputeColDistribution(width_, size, col_counts, col_starts); | |
| 163 | |||
| 164 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 22 times.
|
24 | int local_col_start = col_starts[rank]; |
| 165 | 24 | int local_col_count = col_counts[rank]; | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 22 times.
|
24 | if (local_col_count == 0) { |
| 168 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Bcast(output_pixels_.data(), static_cast<int>(output_pixels_.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 169 | return true; | ||
| 170 | } | ||
| 171 | |||
| 172 | int left_pad = ComputePadding(local_col_start, local_col_count, width_, true); | ||
| 173 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | int right_pad = ComputePadding(local_col_start, local_col_count, width_, false); |
| 174 | 22 | int extended_col_count = local_col_count + left_pad + right_pad; | |
| 175 | |||
| 176 |
1/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
22 | std::vector<uint8_t> local_input(static_cast<size_t>(extended_col_count) * static_cast<size_t>(height_)); |
| 177 | |||
| 178 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | if (rank == 0) { |
| 179 | 12 | int ext_start = local_col_start - left_pad; | |
| 180 | 12 | ExtractLocalData(input_pixels_, local_input, width_, height_, ext_start, extended_col_count); | |
| 181 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | SendDataToWorkers(input_pixels_, col_counts, col_starts, width_, height_, size); |
| 182 | } else { | ||
| 183 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(local_input.data(), static_cast<int>(local_input.size()), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, |
| 184 | MPI_STATUS_IGNORE); | ||
| 185 | } | ||
| 186 | |||
| 187 |
1/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
22 | std::vector<uint8_t> local_output(static_cast<size_t>(local_col_count) * static_cast<size_t>(height_)); |
| 188 | 22 | ApplyFilterToLocalData(local_input, local_output, extended_col_count, local_col_count, height_, left_pad); | |
| 189 | |||
| 190 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | if (rank == 0) { |
| 191 | 12 | CopyLocalToOutput(output_pixels_, local_output, width_, height_, local_col_start, local_col_count); | |
| 192 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | ReceiveResultsFromWorkers(output_pixels_, col_counts, col_starts, width_, height_, size); |
| 193 | } else { | ||
| 194 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(local_output.data(), static_cast<int>(local_output.size()), MPI_UNSIGNED_CHAR, 0, 1, MPI_COMM_WORLD); |
| 195 | } | ||
| 196 | |||
| 197 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Bcast(output_pixels_.data(), static_cast<int>(output_pixels_.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 198 | |||
| 199 | return true; | ||
| 200 | } | ||
| 201 | |||
| 202 | 24 | bool MarinLLinearFilterVerticalMPI::PostProcessingImpl() { | |
| 203 | 24 | GetOutput() = output_pixels_; | |
| 204 | 24 | return true; | |
| 205 | } | ||
| 206 | |||
| 207 | } // namespace marin_l_linear_filter_vertical | ||
| 208 |