| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "rychkova_gauss/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "rychkova_gauss/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace rychkova_gauss { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | int Mirror(int x, int xmin, int xmax) { | ||
| 19 | 14212368 | if (x < xmin) { | |
| 20 | return 1; | ||
| 21 | } | ||
| 22 |
4/4✓ Branch 0 taken 7764 times.
✓ Branch 1 taken 7090656 times.
✓ Branch 2 taken 7281 times.
✓ Branch 3 taken 7091622 times.
|
14197323 | if (x >= xmax) { |
| 23 | 15045 | return xmax - 1; | |
| 24 | } | ||
| 25 | return x; | ||
| 26 | }; | ||
| 27 | |||
| 28 | 789576 | Pixel ComputePixel(const Image &image, std::size_t x, std::size_t y, std::size_t width, std::size_t height) { | |
| 29 | Pixel result = {.R = 0, .G = 0, .B = 0}; | ||
| 30 |
2/2✓ Branch 0 taken 2368728 times.
✓ Branch 1 taken 789576 times.
|
3158304 | for (int shift_x = -1; shift_x < 2; shift_x++) { |
| 31 |
2/2✓ Branch 0 taken 7106184 times.
✓ Branch 1 taken 2368728 times.
|
9474912 | for (int shift_y = -1; shift_y < 2; shift_y++) { |
| 32 |
2/2✓ Branch 0 taken 7098420 times.
✓ Branch 1 taken 7764 times.
|
7106184 | int xn = Mirror(static_cast<int>(x) + shift_x, 0, static_cast<int>(width)); |
| 33 |
2/2✓ Branch 0 taken 7098903 times.
✓ Branch 1 taken 7281 times.
|
7106184 | int yn = Mirror(static_cast<int>(y) + shift_y, 0, static_cast<int>(height)); |
| 34 | 7106184 | auto current = image[yn][xn]; | |
| 35 | 7106184 | result.R += static_cast<uint8_t>(static_cast<double>(current.R) * kKernel[shift_x + 1][shift_y + 1]); | |
| 36 | 7106184 | result.G += static_cast<uint8_t>(static_cast<double>(current.G) * kKernel[shift_x + 1][shift_y + 1]); | |
| 37 | 7106184 | result.B += static_cast<uint8_t>(static_cast<double>(current.B) * kKernel[shift_x + 1][shift_y + 1]); | |
| 38 | } | ||
| 39 | } | ||
| 40 | 789576 | return result; | |
| 41 | } | ||
| 42 | } // namespace | ||
| 43 | |||
| 44 |
3/8✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 16 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
16 | RychkovaGaussALL::RychkovaGaussALL(const InType &in) : loutput_({}), goutput_({}) { |
| 45 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 46 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 47 | GetOutput() = {}; | ||
| 48 | 16 | } | |
| 49 | |||
| 50 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | bool RychkovaGaussALL::ValidationImpl() { |
| 51 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (GetInput().empty()) { |
| 52 | return false; | ||
| 53 | } | ||
| 54 | const auto len = GetInput()[0].size(); | ||
| 55 | return std::ranges::all_of(GetInput(), [len](const auto &row) { return row.size() == len; }); | ||
| 56 | } | ||
| 57 | |||
| 58 | 16 | bool RychkovaGaussALL::PreProcessingImpl() { | |
| 59 | const auto &image = GetInput(); | ||
| 60 | const auto width = image[0].size(); | ||
| 61 | const auto height = image.size(); | ||
| 62 | 16 | loutput_.resize(width * height * 3, 0); | |
| 63 | 16 | goutput_.resize(width * height * 3, 0); | |
| 64 | 16 | return true; | |
| 65 | } | ||
| 66 | |||
| 67 | 16 | bool RychkovaGaussALL::RunImpl() { | |
| 68 | const auto &image = GetInput(); | ||
| 69 | const auto width = image[0].size(); | ||
| 70 | const auto height = image.size(); | ||
| 71 |
1/2✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | GetOutput() = Image(height, std::vector<Pixel>(width, Pixel(0, 0, 0))); |
| 72 | |||
| 73 | 16 | int n = 0; | |
| 74 | 16 | int idx = 0; | |
| 75 | |||
| 76 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &n); | |
| 77 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &idx); | |
| 78 | |||
| 79 | 16 | size_t row_per_proc = height / n; | |
| 80 | 16 | size_t remainder_rows = height % n; | |
| 81 | 16 | size_t start = (idx * row_per_proc) + std::min(static_cast<size_t>(idx), remainder_rows); | |
| 82 | 16 | size_t end = ((idx + 1) * row_per_proc) + std::min(static_cast<size_t>(idx + 1), remainder_rows); | |
| 83 | |||
| 84 | 16 | size_t local_rows = end - start; | |
| 85 | |||
| 86 | 16 | std::vector<uint8_t> local_output(local_rows * width * 3); | |
| 87 | |||
| 88 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<int> counts(n); |
| 89 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<int> displs(n); |
| 90 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
|
48 | for (int i = 0; i < n; i++) { |
| 91 | 32 | size_t rows_i = row_per_proc + (std::cmp_less(i, remainder_rows) ? 1 : 0); | |
| 92 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | counts[i] = static_cast<int>(rows_i * width * 3); |
| 93 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | displs[i] = (i == 0) ? 0 : displs[i - 1] + counts[i - 1]; |
| 94 | } | ||
| 95 | |||
| 96 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | #pragma omp parallel for shared(local_output, image, width, height, start, local_rows) default(none) \ |
| 97 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | num_threads(ppc::util::GetNumThreads()) |
| 98 | for (size_t local_j = 0; local_j < local_rows; local_j++) { | ||
| 99 | size_t global_j = start + local_j; | ||
| 100 | for (size_t i = 0; i < width; i++) { | ||
| 101 | auto px = ComputePixel(image, i, global_j, width, height); | ||
| 102 | size_t flat_idx = ((local_j * width) + i) * 3; | ||
| 103 | local_output[flat_idx] = px.R; | ||
| 104 | local_output[flat_idx + 1] = px.G; | ||
| 105 | local_output[flat_idx + 2] = px.B; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Gatherv(local_output.data(), static_cast<int>(local_output.size()), MPI_UINT8_T, goutput_.data(), counts.data(), |
| 110 | displs.data(), MPI_UINT8_T, 0, MPI_COMM_WORLD); | ||
| 111 | |||
| 112 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Bcast(goutput_.data(), static_cast<int>(goutput_.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 113 | |||
| 114 | auto &output = GetOutput(); | ||
| 115 | |||
| 116 |
2/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
|
16 | #pragma omp parallel for collapse(2) shared(height, width, output) default(none) num_threads(ppc::util::GetNumThreads()) |
| 117 | for (size_t j = 0; j < height; j++) { | ||
| 118 | for (size_t i = 0; i < width; i++) { | ||
| 119 | size_t flat_idx = ((j * width) + i) * 3; | ||
| 120 | output[j][i] = Pixel(goutput_[flat_idx], goutput_[flat_idx + 1], goutput_[flat_idx + 2]); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | 16 | return true; | |
| 124 | } | ||
| 125 | |||
| 126 | 16 | bool RychkovaGaussALL::PostProcessingImpl() { | |
| 127 | 16 | return true; | |
| 128 | } | ||
| 129 | |||
| 130 | } // namespace rychkova_gauss | ||
| 131 |