| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "nalitov_d_dijkstras_algorithm/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <atomic> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "nalitov_d_dijkstras_algorithm/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace nalitov_d_dijkstras_algorithm { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using VertexChoice = std::pair<Cost, NodeId>; | ||
| 20 | |||
| 21 | 60 | VertexChoice SelectBestVertex(const std::vector<char> &visited, const std::vector<Cost> &dist, int n, int threads) { | |
| 22 | 60 | Cost best_cost = kInf; | |
| 23 | 60 | NodeId best_vtx = -1; | |
| 24 | |||
| 25 | 60 | #pragma omp parallel default(none) num_threads(threads) shared(visited, dist, n, best_cost, best_vtx) | |
| 26 | { | ||
| 27 | Cost thr_cost = kInf; | ||
| 28 | NodeId thr_vtx = -1; | ||
| 29 | |||
| 30 | #pragma omp for nowait | ||
| 31 | for (int vi = 0; vi < n; ++vi) { | ||
| 32 | const auto idx = static_cast<std::size_t>(vi); | ||
| 33 | if (visited[idx] == 0 && dist[idx] < thr_cost) { | ||
| 34 | thr_cost = dist[idx]; | ||
| 35 | thr_vtx = vi; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | #pragma omp critical | ||
| 40 | { | ||
| 41 | if (thr_cost < best_cost || (thr_cost == best_cost && thr_vtx != -1 && (best_vtx == -1 || thr_vtx < best_vtx))) { | ||
| 42 | best_cost = thr_cost; | ||
| 43 | best_vtx = thr_vtx; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | 60 | return {best_cost, best_vtx}; | |
| 49 | } | ||
| 50 | |||
| 51 | void RelaxNeighbors(const std::vector<std::pair<NodeId, Cost>> &nbrs, Cost best_cost, const std::vector<char> &visited, | ||
| 52 | std::vector<Cost> &dist, int threads) { | ||
| 53 | const std::size_t sz = nbrs.size(); | ||
| 54 | 60 | #pragma omp parallel for default(none) num_threads(threads) shared(nbrs, best_cost, visited, dist, sz) schedule(static) | |
| 55 | for (std::size_t i = 0; i < sz; ++i) { | ||
| 56 | const NodeId tgt = nbrs[i].first; | ||
| 57 | const Cost w = nbrs[i].second; | ||
| 58 | const auto tgt_idx = static_cast<std::size_t>(tgt); | ||
| 59 | |||
| 60 | if (visited[tgt_idx] == 0 && best_cost <= kInf - w) { | ||
| 61 | const Cost cand = best_cost + w; | ||
| 62 | std::atomic_ref<Cost> target(dist[tgt_idx]); | ||
| 63 | Cost old_val = target.load(std::memory_order_relaxed); | ||
| 64 | |||
| 65 | while (cand < old_val) { | ||
| 66 | if (target.compare_exchange_weak(old_val, cand, std::memory_order_relaxed)) { | ||
| 67 | break; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | std::int64_t SumDistances(const std::vector<Cost> &dist, int n, int threads) { | ||
| 75 | std::int64_t total = 0; | ||
| 76 | |||
| 77 | 16 | #pragma omp parallel for default(none) num_threads(threads) shared(dist, n) reduction(+ : total) | |
| 78 | for (int vi = 0; vi < n; ++vi) { | ||
| 79 | const auto d = dist[static_cast<std::size_t>(vi)]; | ||
| 80 | if (d != kInf) { | ||
| 81 | total += static_cast<std::int64_t>(d); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | return total; | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace | ||
| 89 | |||
| 90 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | NalitovDDijkstrasAlgorithmOmp::NalitovDDijkstrasAlgorithmOmp(const InType &in) { |
| 91 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 92 | GetInput() = in; | ||
| 93 | 16 | GetOutput() = 0; | |
| 94 | 16 | } | |
| 95 | |||
| 96 | 16 | bool NalitovDDijkstrasAlgorithmOmp::ValidationImpl() { | |
| 97 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (GetOutput() != 0) { |
| 98 | return false; | ||
| 99 | } | ||
| 100 | const InType &in = GetInput(); | ||
| 101 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (in.n <= 0 || in.n > 10000) { |
| 102 | return false; | ||
| 103 | } | ||
| 104 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | if (in.source < 0 || in.source >= in.n) { |
| 105 | return false; | ||
| 106 | } | ||
| 107 | |||
| 108 | return std::ranges::all_of(in.arcs, [&in](const Arc &a) { | ||
| 109 |
5/10✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 48 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 48 times.
✗ Branch 9 not taken.
|
48 | return a.from >= 0 && a.to >= 0 && a.from < in.n && a.to < in.n && a.weight >= 0; |
| 110 | }); | ||
| 111 | } | ||
| 112 | |||
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | bool NalitovDDijkstrasAlgorithmOmp::PreProcessingImpl() { |
| 114 | const InType &in = GetInput(); | ||
| 115 | 16 | const int vn = in.n; | |
| 116 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (vn <= 0) { |
| 117 | return false; | ||
| 118 | } | ||
| 119 | |||
| 120 | 16 | omp_threads_ = ppc::util::GetNumThreads(); | |
| 121 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (omp_threads_ <= 0) { |
| 122 | return false; | ||
| 123 | } | ||
| 124 | |||
| 125 | 16 | const auto vn_u = static_cast<std::size_t>(vn); | |
| 126 | 16 | std::vector<std::size_t> outdeg(vn_u, 0); | |
| 127 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 16 times.
|
64 | for (const Arc &a : in.arcs) { |
| 128 | 48 | ++outdeg[static_cast<std::size_t>(a.from)]; | |
| 129 | } | ||
| 130 | |||
| 131 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | graph_.assign(vn_u, {}); |
| 132 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<std::atomic<std::size_t>> row_next(vn_u); |
| 133 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 16 times.
|
76 | for (std::size_t i = 0; i < vn_u; ++i) { |
| 134 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | graph_[i].resize(outdeg[i]); |
| 135 | row_next[i].store(0, std::memory_order_relaxed); | ||
| 136 | } | ||
| 137 | |||
| 138 | const std::size_t arc_count = in.arcs.size(); | ||
| 139 | auto &graph = graph_; | ||
| 140 | |||
| 141 | 16 | #pragma omp parallel for default(none) num_threads(omp_threads_) shared(in, row_next, graph, arc_count) schedule(guided) | |
| 142 | for (std::size_t ei = 0; ei < arc_count; ++ei) { | ||
| 143 | const Arc &a = in.arcs[ei]; | ||
| 144 | const auto u = static_cast<std::size_t>(a.from); | ||
| 145 | const std::size_t slot = row_next[u].fetch_add(1, std::memory_order_relaxed); | ||
| 146 | graph[u][slot] = {a.to, a.weight}; | ||
| 147 | } | ||
| 148 | |||
| 149 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | dist_.assign(vn_u, kInf); |
| 150 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | visited_.assign(vn_u, 0); |
| 151 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | dist_[static_cast<std::size_t>(in.source)] = 0; |
| 152 | |||
| 153 | return true; | ||
| 154 | } | ||
| 155 | |||
| 156 | 16 | bool NalitovDDijkstrasAlgorithmOmp::RunImpl() { | |
| 157 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const int n = GetInput().n; |
| 158 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (graph_.size() != static_cast<std::size_t>(n)) { |
| 159 | return false; | ||
| 160 | } | ||
| 161 | |||
| 162 | 16 | auto &dist = dist_; | |
| 163 | 16 | auto &visited = visited_; | |
| 164 | const auto &graph = graph_; | ||
| 165 | 16 | const int threads = omp_threads_; | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 16 times.
|
76 | for (int step = 0; step < n; ++step) { |
| 168 | 60 | const auto choice = SelectBestVertex(visited, dist, n, threads); | |
| 169 | 60 | const Cost best_cost = choice.first; | |
| 170 | 60 | const NodeId best_vtx = choice.second; | |
| 171 | |||
| 172 |
1/2✓ Branch 0 taken 60 times.
✗ Branch 1 not taken.
|
60 | if (best_vtx == -1 || best_cost == kInf) { |
| 173 | break; | ||
| 174 | } | ||
| 175 | |||
| 176 | 60 | visited[static_cast<std::size_t>(best_vtx)] = 1; | |
| 177 | RelaxNeighbors(graph[static_cast<std::size_t>(best_vtx)], best_cost, visited, dist, threads); | ||
| 178 | } | ||
| 179 | |||
| 180 | 16 | GetOutput() = static_cast<OutType>(SumDistances(dist, n, threads)); | |
| 181 | 16 | return true; | |
| 182 | } | ||
| 183 | |||
| 184 | 16 | bool NalitovDDijkstrasAlgorithmOmp::PostProcessingImpl() { | |
| 185 | 16 | return GetOutput() >= 0; | |
| 186 | } | ||
| 187 | |||
| 188 | } // namespace nalitov_d_dijkstras_algorithm | ||
| 189 |