| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zhurin_i_gaus_kernel/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "zhurin_i_gaus_kernel/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace zhurin_i_gaus_kernel { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | int GetPixelZero(const std::vector<std::vector<int>> &img, int row, int col, int width, int height) { | ||
| 18 | if (row < 0 || row >= height || col < 0 || col >= width) { | ||
| 19 | return 0; | ||
| 20 | } | ||
| 21 | return img[row][col]; | ||
| 22 | } | ||
| 23 | |||
| 24 | } // namespace | ||
| 25 | |||
| 26 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | ZhurinIGausKernelALL::ZhurinIGausKernelALL(const InType &in) { |
| 27 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 28 | GetInput() = in; | ||
| 29 | 12 | GetOutput() = OutType{}; | |
| 30 | 12 | } | |
| 31 | |||
| 32 | 12 | bool ZhurinIGausKernelALL::ValidationImpl() { | |
| 33 | const auto &in = GetInput(); | ||
| 34 | 12 | int w = std::get<0>(in); | |
| 35 | 12 | int h = std::get<1>(in); | |
| 36 | 12 | int parts = std::get<2>(in); | |
| 37 | const auto &img = std::get<3>(in); | ||
| 38 | |||
| 39 |
2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
|
12 | if (w <= 0 || h <= 0 || parts <= 0 || parts > w) { |
| 40 | return false; | ||
| 41 | } | ||
| 42 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
|
12 | if (std::cmp_not_equal(img.size(), static_cast<std::size_t>(h))) { |
| 43 | return false; | ||
| 44 | } | ||
| 45 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 12 times.
|
44 | for (int i = 0; i < h; ++i) { |
| 46 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (std::cmp_not_equal(img[i].size(), static_cast<std::size_t>(w))) { |
| 47 | return false; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | 12 | int initialized = 0; | |
| 52 | 12 | MPI_Initialized(&initialized); | |
| 53 | 12 | return initialized != 0; | |
| 54 | } | ||
| 55 | |||
| 56 | 12 | bool ZhurinIGausKernelALL::PreProcessingImpl() { | |
| 57 | const auto &in = GetInput(); | ||
| 58 | 12 | width_ = std::get<0>(in); | |
| 59 | 12 | height_ = std::get<1>(in); | |
| 60 | 12 | num_parts_ = std::get<2>(in); | |
| 61 | 12 | image_ = std::get<3>(in); | |
| 62 | result_.clear(); | ||
| 63 | 12 | result_.reserve(height_); | |
| 64 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 12 times.
|
44 | for (int i = 0; i < height_; ++i) { |
| 65 | 32 | result_.emplace_back(width_, 0); | |
| 66 | } | ||
| 67 | 12 | return true; | |
| 68 | } | ||
| 69 | |||
| 70 | 12 | bool ZhurinIGausKernelALL::RunImpl() { | |
| 71 | 12 | int rank = 0; | |
| 72 | 12 | int size = 1; | |
| 73 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 74 | 12 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 75 | |||
| 76 | 12 | const int w = width_; | |
| 77 | 12 | const int h = height_; | |
| 78 | 12 | const auto &img = image_; | |
| 79 | |||
| 80 | 12 | const int base = h / size; | |
| 81 | 12 | const int rem = h % size; | |
| 82 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | const int start = (rank * base) + std::min(rank, rem); |
| 83 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | const int end = start + base + ((rank < rem) ? 1 : 0); |
| 84 | 12 | const int local_rows = end - start; | |
| 85 | |||
| 86 | 12 | std::vector<int> flat_result(static_cast<std::size_t>(h) * static_cast<std::size_t>(w), 0); | |
| 87 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
12 | std::vector<int> local_flat(static_cast<std::size_t>(local_rows) * static_cast<std::size_t>(w), 0); |
| 88 | |||
| 89 | 12 | #pragma omp parallel for default(none) shared(local_flat, w, start, end, img, h) | |
| 90 | for (int i = start; i < end; ++i) { | ||
| 91 | const int row_idx = i - start; | ||
| 92 | for (int j = 0; j < w; ++j) { | ||
| 93 | const int p00 = GetPixelZero(img, i - 1, j - 1, w, h); | ||
| 94 | const int p01 = GetPixelZero(img, i - 1, j, w, h); | ||
| 95 | const int p02 = GetPixelZero(img, i - 1, j + 1, w, h); | ||
| 96 | const int p10 = GetPixelZero(img, i, j - 1, w, h); | ||
| 97 | const int p11 = GetPixelZero(img, i, j, w, h); | ||
| 98 | const int p12 = GetPixelZero(img, i, j + 1, w, h); | ||
| 99 | const int p20 = GetPixelZero(img, i + 1, j - 1, w, h); | ||
| 100 | const int p21 = GetPixelZero(img, i + 1, j, w, h); | ||
| 101 | const int p22 = GetPixelZero(img, i + 1, j + 1, w, h); | ||
| 102 | |||
| 103 | int sum = (p00 * kKernel[0][0]) + (p01 * kKernel[0][1]) + (p02 * kKernel[0][2]) + (p10 * kKernel[1][0]) + | ||
| 104 | (p11 * kKernel[1][1]) + (p12 * kKernel[1][2]) + (p20 * kKernel[2][0]) + (p21 * kKernel[2][1]) + | ||
| 105 | (p22 * kKernel[2][2]); | ||
| 106 | |||
| 107 | const std::size_t idx = | ||
| 108 | (static_cast<std::size_t>(row_idx) * static_cast<std::size_t>(w)) + static_cast<std::size_t>(j); | ||
| 109 | local_flat[idx] = sum >> kShift; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
|
12 | if (size == 1) { |
| 114 | ✗ | std::copy_n(local_flat.data(), static_cast<std::ptrdiff_t>(local_flat.size()), flat_result.data()); | |
| 115 | } else { | ||
| 116 |
2/6✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
12 | std::vector<int> recv_counts_rows(static_cast<std::size_t>(size), 0); |
| 117 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | MPI_Allgather(&local_rows, 1, MPI_INT, recv_counts_rows.data(), 1, MPI_INT, MPI_COMM_WORLD); |
| 118 | |||
| 119 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
12 | std::vector<int> recv_counts(static_cast<std::size_t>(size), 0); |
| 120 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
12 | std::vector<int> displs(static_cast<std::size_t>(size), 0); |
| 121 | int total = 0; | ||
| 122 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
|
36 | for (int i = 0; i < size; ++i) { |
| 123 | 24 | recv_counts[i] = recv_counts_rows[i] * w; | |
| 124 | 24 | displs[i] = total; | |
| 125 | 24 | total += recv_counts[i]; | |
| 126 | } | ||
| 127 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | MPI_Allgatherv(local_flat.data(), local_rows * w, MPI_INT, flat_result.data(), recv_counts.data(), displs.data(), |
| 128 | MPI_INT, MPI_COMM_WORLD); | ||
| 129 | } | ||
| 130 | |||
| 131 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 12 times.
|
44 | for (int i = 0; i < h; ++i) { |
| 132 | 32 | const std::ptrdiff_t offset = static_cast<std::ptrdiff_t>(i) * static_cast<std::ptrdiff_t>(w); | |
| 133 | 32 | std::copy_n(flat_result.data() + offset, static_cast<std::ptrdiff_t>(w), result_[i].begin()); | |
| 134 | } | ||
| 135 | |||
| 136 | 12 | return true; | |
| 137 | } | ||
| 138 | |||
| 139 | 12 | bool ZhurinIGausKernelALL::PostProcessingImpl() { | |
| 140 | 12 | GetOutput() = std::move(result_); | |
| 141 | 12 | return true; | |
| 142 | } | ||
| 143 | |||
| 144 | } // namespace zhurin_i_gaus_kernel | ||
| 145 |