| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "redkina_a_integral_simpson/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <functional> | ||
| 9 | #include <future> | ||
| 10 | #include <thread> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "redkina_a_integral_simpson/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace redkina_a_integral_simpson { | ||
| 16 | namespace { | ||
| 17 | |||
| 18 | 40 | std::vector<std::vector<double>> PrecomputeWeights(const std::vector<int> &n) { | |
| 19 | const size_t dim = n.size(); | ||
| 20 | 40 | std::vector<std::vector<double>> weights(dim); | |
| 21 |
2/2✓ Branch 0 taken 76 times.
✓ Branch 1 taken 40 times.
|
116 | for (size_t i = 0; i < dim; ++i) { |
| 22 |
1/2✓ Branch 1 taken 76 times.
✗ Branch 2 not taken.
|
76 | const int ni = n[i]; |
| 23 |
1/2✓ Branch 1 taken 76 times.
✗ Branch 2 not taken.
|
76 | weights[i].resize(ni + 1); |
| 24 |
2/2✓ Branch 0 taken 4812 times.
✓ Branch 1 taken 76 times.
|
4888 | for (int idx = 0; idx <= ni; ++idx) { |
| 25 |
2/2✓ Branch 0 taken 152 times.
✓ Branch 1 taken 4660 times.
|
4812 | if (idx == 0 || idx == ni) { |
| 26 | 152 | weights[i][idx] = 1.0; | |
| 27 |
2/2✓ Branch 0 taken 2368 times.
✓ Branch 1 taken 2292 times.
|
4660 | } else if (idx % 2 == 1) { |
| 28 | 2368 | weights[i][idx] = 4.0; | |
| 29 | } else { | ||
| 30 | 2292 | weights[i][idx] = 2.0; | |
| 31 | } | ||
| 32 | } | ||
| 33 | } | ||
| 34 | 40 | return weights; | |
| 35 | ✗ | } | |
| 36 | |||
| 37 | 40 | std::vector<size_t> ComputeStrides(const std::vector<int> &n) { | |
| 38 | const size_t dim = n.size(); | ||
| 39 | 40 | std::vector<size_t> strides(dim); | |
| 40 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (dim == 0) { |
| 41 | return strides; | ||
| 42 | } | ||
| 43 | 40 | strides[dim - 1] = 1; | |
| 44 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 40 times.
|
76 | for (size_t i = dim - 1; i > 0; --i) { |
| 45 | 36 | strides[i - 1] = strides[i] * static_cast<size_t>(n[i] + 1); | |
| 46 | } | ||
| 47 | return strides; | ||
| 48 | } | ||
| 49 | |||
| 50 | 140 | double ComputeRangeSum(size_t start, size_t end, const std::vector<double> &a, const std::vector<double> &h, | |
| 51 | const std::vector<std::vector<double>> &weights, const std::vector<size_t> &strides, | ||
| 52 | const std::function<double(const std::vector<double> &)> &func, size_t dim) { | ||
| 53 | double sum = 0.0; | ||
| 54 | 140 | std::vector<int> indices(dim); | |
| 55 |
1/4✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
140 | std::vector<double> point(dim); |
| 56 |
2/2✓ Branch 0 taken 231284 times.
✓ Branch 1 taken 140 times.
|
231424 | for (size_t idx = start; idx < end; ++idx) { |
| 57 | size_t remainder = idx; | ||
| 58 |
2/2✓ Branch 0 taken 530982 times.
✓ Branch 1 taken 231284 times.
|
762266 | for (size_t dim_idx = 0; dim_idx < dim; ++dim_idx) { |
| 59 | 530982 | indices[dim_idx] = static_cast<int>(remainder / strides[dim_idx]); | |
| 60 | 530982 | remainder %= strides[dim_idx]; | |
| 61 | } | ||
| 62 | double w_prod = 1.0; | ||
| 63 |
2/2✓ Branch 0 taken 530982 times.
✓ Branch 1 taken 231284 times.
|
762266 | for (size_t dim_idx = 0; dim_idx < dim; ++dim_idx) { |
| 64 | 530982 | const int i_idx = indices[dim_idx]; | |
| 65 | 530982 | point[dim_idx] = a[dim_idx] + (static_cast<double>(i_idx) * h[dim_idx]); | |
| 66 | 530982 | w_prod *= weights[dim_idx][i_idx]; | |
| 67 | } | ||
| 68 | 231284 | sum += w_prod * func(point); | |
| 69 | } | ||
| 70 | 140 | return sum; | |
| 71 | } | ||
| 72 | |||
| 73 | 40 | double ComputeLocalSumMPI(size_t local_start, size_t local_end, const std::vector<double> &a, | |
| 74 | const std::vector<double> &h, const std::vector<std::vector<double>> &weights, | ||
| 75 | const std::vector<size_t> &strides, | ||
| 76 | const std::function<double(const std::vector<double> &)> &func, size_t dim) { | ||
| 77 | 40 | const size_t local_size = local_end - local_start; | |
| 78 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (local_size == 0) { |
| 79 | return 0.0; | ||
| 80 | } | ||
| 81 | |||
| 82 | 40 | unsigned int hardware_threads = std::thread::hardware_concurrency(); | |
| 83 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | if (hardware_threads == 0) { |
| 84 | ✗ | hardware_threads = 2; | |
| 85 | } | ||
| 86 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 36 times.
|
40 | unsigned int num_threads = std::min(hardware_threads, static_cast<unsigned int>(local_size)); |
| 87 | |||
| 88 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 36 times.
|
40 | if (num_threads == 1) { |
| 89 | 4 | return ComputeRangeSum(local_start, local_end, a, h, weights, strides, func, dim); | |
| 90 | } | ||
| 91 | |||
| 92 | 36 | std::vector<std::future<double>> futures; | |
| 93 | 36 | const size_t block_size = local_size / num_threads; | |
| 94 | 36 | const size_t rem_blocks = local_size % num_threads; | |
| 95 | size_t current_start = local_start; | ||
| 96 | |||
| 97 |
2/2✓ Branch 0 taken 136 times.
✓ Branch 1 taken 36 times.
|
172 | for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) { |
| 98 |
2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 24 times.
|
136 | const size_t block_end = current_start + block_size + (thread_idx < rem_blocks ? 1 : 0); |
| 99 | futures.push_back( | ||
| 100 |
1/2✓ Branch 1 taken 136 times.
✗ Branch 2 not taken.
|
272 | std::async(std::launch::async, [&a, &h, &weights, &strides, &func, dim, current_start, block_end]() { |
| 101 | 136 | return ComputeRangeSum(current_start, block_end, a, h, weights, strides, func, dim); | |
| 102 | })); | ||
| 103 | current_start = block_end; | ||
| 104 | } | ||
| 105 | |||
| 106 | double total = 0.0; | ||
| 107 |
2/2✓ Branch 0 taken 136 times.
✓ Branch 1 taken 36 times.
|
172 | for (auto &f : futures) { |
| 108 |
1/2✓ Branch 1 taken 136 times.
✗ Branch 2 not taken.
|
136 | total += f.get(); |
| 109 | } | ||
| 110 | return total; | ||
| 111 | 36 | } | |
| 112 | |||
| 113 | } // namespace | ||
| 114 | |||
| 115 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | RedkinaAIntegralSimpsonALL::RedkinaAIntegralSimpsonALL(const InType &in) { |
| 116 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 117 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | GetInput() = in; |
| 118 | 40 | } | |
| 119 | |||
| 120 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | bool RedkinaAIntegralSimpsonALL::ValidationImpl() { |
| 121 | const auto &in = GetInput(); | ||
| 122 | const size_t dim = in.a.size(); | ||
| 123 | |||
| 124 |
3/6✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 40 times.
|
40 | if (dim == 0 || in.b.size() != dim || in.n.size() != dim) { |
| 125 | return false; | ||
| 126 | } | ||
| 127 |
2/2✓ Branch 0 taken 76 times.
✓ Branch 1 taken 40 times.
|
116 | for (size_t i = 0; i < dim; ++i) { |
| 128 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 76 times.
|
76 | if (in.a[i] >= in.b[i]) { |
| 129 | return false; | ||
| 130 | } | ||
| 131 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 76 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 76 times.
|
76 | if (in.n[i] <= 0 || in.n[i] % 2 != 0) { |
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | 40 | return static_cast<bool>(in.func); | |
| 136 | } | ||
| 137 | |||
| 138 | 40 | bool RedkinaAIntegralSimpsonALL::PreProcessingImpl() { | |
| 139 | const auto &in = GetInput(); | ||
| 140 | 40 | func_ = in.func; | |
| 141 | 40 | a_ = in.a; | |
| 142 | 40 | b_ = in.b; | |
| 143 | 40 | n_ = in.n; | |
| 144 | 40 | result_ = 0.0; | |
| 145 | 40 | return true; | |
| 146 | } | ||
| 147 | |||
| 148 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | bool RedkinaAIntegralSimpsonALL::RunImpl() { |
| 149 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (!func_) { |
| 150 | return false; | ||
| 151 | } | ||
| 152 | const size_t dim = a_.size(); | ||
| 153 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (dim == 0) { |
| 154 | return false; | ||
| 155 | } | ||
| 156 | |||
| 157 | 40 | std::vector<double> h(dim); | |
| 158 | double h_prod = 1.0; | ||
| 159 |
2/2✓ Branch 0 taken 76 times.
✓ Branch 1 taken 40 times.
|
116 | for (size_t i = 0; i < dim; ++i) { |
| 160 | 76 | h[i] = (b_[i] - a_[i]) / static_cast<double>(n_[i]); | |
| 161 | 76 | h_prod *= h[i]; | |
| 162 | } | ||
| 163 | |||
| 164 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | const auto weights = PrecomputeWeights(n_); |
| 165 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | const auto strides = ComputeStrides(n_); |
| 166 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (strides.empty()) { |
| 167 | return false; | ||
| 168 | } | ||
| 169 | |||
| 170 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | const size_t total_points = strides[0] * static_cast<size_t>(n_[0] + 1); |
| 171 | |||
| 172 | 40 | int rank = 0; | |
| 173 | 40 | int world_size = 1; | |
| 174 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 175 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); |
| 176 | |||
| 177 | 40 | const auto rank_u = static_cast<size_t>(rank); | |
| 178 | 40 | const auto size_u = static_cast<size_t>(world_size); | |
| 179 | 40 | const size_t base = total_points / size_u; | |
| 180 | 40 | const size_t rem = total_points % size_u; | |
| 181 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | const size_t local_start = (rank_u * base) + std::min(rank_u, rem); |
| 182 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | const size_t local_end = local_start + base + (rank_u < rem ? 1 : 0); |
| 183 | |||
| 184 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | const double local_sum = ComputeLocalSumMPI(local_start, local_end, a_, h, weights, strides, func_, dim); |
| 185 | |||
| 186 | 40 | double global_sum = 0.0; | |
| 187 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); |
| 188 | |||
| 189 | double denominator = 1.0; | ||
| 190 |
2/2✓ Branch 0 taken 76 times.
✓ Branch 1 taken 40 times.
|
116 | for (size_t i = 0; i < dim; ++i) { |
| 191 | 76 | denominator *= 3.0; | |
| 192 | } | ||
| 193 | 40 | result_ = (h_prod / denominator) * global_sum; | |
| 194 | 40 | return true; | |
| 195 | 40 | } | |
| 196 | |||
| 197 | 40 | bool RedkinaAIntegralSimpsonALL::PostProcessingImpl() { | |
| 198 | 40 | GetOutput() = result_; | |
| 199 | 40 | return true; | |
| 200 | } | ||
| 201 | |||
| 202 | } // namespace redkina_a_integral_simpson | ||
| 203 |