| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "../include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <functional> | ||
| 9 | #include <tuple> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "../../common/include/common.hpp" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | |||
| 16 | namespace kutergin_a_multidim_trapezoid { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | KuterginAMultidimTrapezoidALL::KuterginAMultidimTrapezoidALL(const InType &in) { |
| 19 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 20 | GetInput() = in; | ||
| 21 | 24 | GetOutput() = 0.0; | |
| 22 | 24 | } | |
| 23 | |||
| 24 | 24 | bool KuterginAMultidimTrapezoidALL::ValidationImpl() { | |
| 25 | const auto &input = GetInput(); | ||
| 26 | const auto &limits = std::get<1>(input); | ||
| 27 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | const int n_steps = std::get<2>(input); |
| 28 | |||
| 29 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (limits.empty()) { |
| 30 | return false; | ||
| 31 | } | ||
| 32 | 24 | return n_steps > 0; | |
| 33 | } | ||
| 34 | |||
| 35 | 24 | bool KuterginAMultidimTrapezoidALL::PreProcessingImpl() { | |
| 36 | local_input_ = GetInput(); | ||
| 37 | 24 | res_ = 0.0; | |
| 38 | 24 | return true; | |
| 39 | } | ||
| 40 | |||
| 41 | 24 | bool KuterginAMultidimTrapezoidALL::RunImpl() { | |
| 42 | 24 | int rank = 0; | |
| 43 | 24 | int size = 1; | |
| 44 | 24 | const bool is_mpi = ppc::util::IsUnderMpirun(); | |
| 45 | |||
| 46 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (is_mpi) { |
| 47 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 48 | 24 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 49 | } | ||
| 50 | |||
| 51 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | size_t dims = (rank == 0) ? std::get<1>(local_input_).size() : 0; |
| 52 | |||
| 53 | 24 | DistributeData(rank, dims); | |
| 54 | |||
| 55 | const auto &func = std::get<0>(local_input_); | ||
| 56 | const auto &limits = std::get<1>(local_input_); | ||
| 57 | 24 | const int n_steps = std::get<2>(local_input_); | |
| 58 | |||
| 59 | size_t total_nodes = 1; | ||
| 60 | 24 | std::vector<double> h(dims); | |
| 61 | double cell_volume = 1.0; | ||
| 62 | |||
| 63 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 24 times.
|
64 | for (size_t i = 0; i < dims; ++i) { |
| 64 | 40 | total_nodes *= (static_cast<size_t>(n_steps) + 1); | |
| 65 | 40 | h[i] = (limits[i].second - limits[i].first) / n_steps; | |
| 66 | 40 | cell_volume *= h[i]; | |
| 67 | } | ||
| 68 | |||
| 69 | 24 | const size_t proc_chunk = total_nodes / size; | |
| 70 | 24 | const size_t proc_remainder = total_nodes % size; | |
| 71 | |||
| 72 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | const size_t my_proc_count = proc_chunk + (std::cmp_less(rank, proc_remainder) ? 1 : 0); |
| 73 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | const size_t my_proc_start = (rank * proc_chunk) + std::min(static_cast<size_t>(rank), proc_remainder); |
| 74 | |||
| 75 | 24 | double local_sum = 0.0; | |
| 76 | |||
| 77 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (my_proc_count > 0) { |
| 78 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | int num_threads = ppc::util::GetNumThreads(); |
| 79 | 24 | omp_set_num_threads(num_threads); | |
| 80 | |||
| 81 | 24 | #pragma omp parallel default(none) shared(h, dims, my_proc_start, my_proc_count, func, n_steps, limits) \ | |
| 82 | reduction(+ : local_sum) | ||
| 83 | { | ||
| 84 | int tid = omp_get_thread_num(); | ||
| 85 | int t_count = omp_get_num_threads(); | ||
| 86 | |||
| 87 | size_t thread_chunk = my_proc_count / t_count; | ||
| 88 | size_t thread_remainder = my_proc_count % t_count; | ||
| 89 | |||
| 90 | size_t my_thread_count = thread_chunk + (std::cmp_less(tid, thread_remainder) ? 1 : 0); | ||
| 91 | size_t my_thread_start = | ||
| 92 | my_proc_start + (tid * thread_chunk) + std::min(static_cast<size_t>(tid), thread_remainder); | ||
| 93 | |||
| 94 | local_sum += CalculateChunkSum(my_thread_start, my_thread_start + my_thread_count, h, limits, n_steps, func); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (is_mpi) { |
| 99 | 24 | double global_sum = 0.0; | |
| 100 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Reduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); |
| 101 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (rank == 0) { |
| 102 | 12 | res_ = global_sum * cell_volume; | |
| 103 | } | ||
| 104 | } else { | ||
| 105 | ✗ | res_ = local_sum * cell_volume; | |
| 106 | } | ||
| 107 | |||
| 108 | 24 | return true; | |
| 109 | } | ||
| 110 | |||
| 111 | 24 | bool KuterginAMultidimTrapezoidALL::PostProcessingImpl() { | |
| 112 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | if (ppc::util::IsUnderMpirun()) { |
| 113 | 24 | MPI_Bcast(&res_, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 114 | } | ||
| 115 | |||
| 116 | 24 | GetOutput() = res_; | |
| 117 | 24 | return true; | |
| 118 | } | ||
| 119 | |||
| 120 | 24 | void KuterginAMultidimTrapezoidALL::DistributeData(int rank, size_t &dims) { | |
| 121 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
|
24 | if (!ppc::util::IsUnderMpirun()) { |
| 122 | ✗ | return; | |
| 123 | } | ||
| 124 | |||
| 125 | 24 | int dims_io = static_cast<int>(dims); | |
| 126 | 24 | MPI_Bcast(&dims_io, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 127 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | dims = static_cast<size_t>(dims_io); |
| 128 | |||
| 129 | auto &limits = std::get<1>(local_input_); | ||
| 130 | auto &n_steps = std::get<2>(local_input_); | ||
| 131 | |||
| 132 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (rank != 0) { |
| 133 | 12 | limits.resize(dims); | |
| 134 | } | ||
| 135 | |||
| 136 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 24 times.
|
64 | for (size_t i = 0; i < dims; ++i) { |
| 137 | 40 | MPI_Bcast(&limits[i].first, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 138 | 40 | MPI_Bcast(&limits[i].second, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 139 | } | ||
| 140 | 24 | MPI_Bcast(&n_steps, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 141 | } | ||
| 142 | |||
| 143 | 48 | double KuterginAMultidimTrapezoidALL::CalculateChunkSum( | |
| 144 | size_t start_idx, size_t end_idx, const std::vector<double> &h, | ||
| 145 | const std::vector<std::pair<double, double>> &limits, int n_steps, | ||
| 146 | const std::function<double(const std::vector<double> &)> &func) { | ||
| 147 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | if (start_idx >= end_idx) { |
| 148 | return 0.0; | ||
| 149 | } | ||
| 150 | |||
| 151 | const size_t dims = limits.size(); | ||
| 152 | 48 | std::vector<double> coords(dims); | |
| 153 | double chunk_sum = 0.0; | ||
| 154 | |||
| 155 |
2/2✓ Branch 0 taken 359012 times.
✓ Branch 1 taken 48 times.
|
359060 | for (size_t i = start_idx; i < end_idx; ++i) { |
| 156 | double weight = 1.0; | ||
| 157 | size_t temp_idx = i; | ||
| 158 | |||
| 159 |
2/2✓ Branch 0 taken 943980 times.
✓ Branch 1 taken 359012 times.
|
1302992 | for (size_t dim = 0; dim < dims; ++dim) { |
| 160 | 943980 | const int nodes_in_dim = n_steps + 1; | |
| 161 | 943980 | const int coord_idx = static_cast<int>(temp_idx % nodes_in_dim); | |
| 162 |
2/2✓ Branch 0 taken 25760 times.
✓ Branch 1 taken 918220 times.
|
943980 | temp_idx /= nodes_in_dim; |
| 163 | |||
| 164 |
2/2✓ Branch 0 taken 25760 times.
✓ Branch 1 taken 918220 times.
|
943980 | coords[dim] = limits[dim].first + (static_cast<double>(coord_idx) * h[dim]); |
| 165 | |||
| 166 |
2/2✓ Branch 0 taken 25760 times.
✓ Branch 1 taken 918220 times.
|
943980 | if (coord_idx == 0 || coord_idx == n_steps) { |
| 167 | 25760 | weight *= 0.5; | |
| 168 | } | ||
| 169 | } | ||
| 170 | 359012 | chunk_sum += weight * func(coords); | |
| 171 | } | ||
| 172 | |||
| 173 | return chunk_sum; | ||
| 174 | } | ||
| 175 | |||
| 176 | } // namespace kutergin_a_multidim_trapezoid | ||
| 177 |