13 template <
class Un
signedIntIterator1,
class Un
signedIntIterator2>
15 UnsignedIntIterator2 workSharingIndexesBegin, UnsignedIntIterator2 workSharingIndexesEnd) {
17 const size_t num_segments = std::distance(workSharingIndexesBegin, workSharingIndexesEnd) - 1;
18 const size_t num_buckets = std::distance(prefixSumLoadBegin, prefixSumLoadEnd) - 1;
20 std::fill(workSharingIndexesBegin, workSharingIndexesEnd - 1, 0);
21 workSharingIndexesBegin[num_segments] = num_buckets;
23 size_t w_min = prefixSumLoadBegin[num_buckets]/num_segments;
24 size_t w_max = 2*w_min + 1;
26 std::vector<size_t> separators(num_segments - 1, 0);
29 const auto check_partition = [&](
size_t wprobe) {
30 const auto print_debug = [&]() {
32 for (
int i = 0; i <= num_segments; ++i)
33 std::cerr << prefixSumLoadBegin[workSharingIndexesBegin[i]] <<
"[" << workSharingIndexesBegin[i] <<
"], ";
38 for (
int i = 0; i < num_segments; ++i) {
39 if (prefixSumLoadBegin[workSharingIndexesBegin[i + 1]] - prefixSumLoadBegin[workSharingIndexesBegin[i]] > wprobe) {
40 std::cerr <<
"CCP failed (too big of a gap) at " << i <<
"/" << num_segments <<
"\n";
44 if (workSharingIndexesBegin[i + 1] + 1 < num_buckets)
45 if (prefixSumLoadBegin[workSharingIndexesBegin[i + 1] + 1] - prefixSumLoadBegin[workSharingIndexesBegin[i]] < wprobe) {
46 std::cerr <<
"CCP failed (too small of a gap) at " << i <<
"/" << num_segments <<
"\n";
55 auto probe_load_sharing = [&](
size_t wprobe) {
56 if (wprobe == 0 || wprobe >= prefixSumLoadBegin[num_buckets])
60 size_t last_separator = 0;
61 size_t last_separator_value = 0;
64 for (
size_t separator = 1; separator < num_segments; ++separator) {
66 separators[separator - 1] = std::distance(prefixSumLoadBegin,
67 std::lower_bound(prefixSumLoadBegin + last_separator, prefixSumLoadEnd,
68 last_separator_value + wprobe)) - 1;
71 last_separator = separators[separator - 1];
72 last_separator_value = prefixSumLoadBegin[last_separator];
75 if (prefixSumLoadBegin[num_buckets] - last_separator_value > wprobe*(num_segments - separator))
80 for (
size_t i = 0; i < num_segments - 1; ++i)
81 workSharingIndexesBegin[i + 1] = separators[i];
87 if (num_segments > 1) {
89 while(w_min != 0 && probe_load_sharing(w_min)) { w_min /= 2; };
92 while(!probe_load_sharing(w_max)) { w_max *= 2; };
95 while (w_max - w_min > 1) {
96 size_t middle = (w_min + w_max) / 2;
98 if (probe_load_sharing(middle)) {
QuIDS utility function and variable namespace.
Definition: algorithm.hpp:6
void load_balancing_from_prefix_sum(const UnsignedIntIterator1 prefixSumLoadBegin, const UnsignedIntIterator1 prefixSumLoadEnd, UnsignedIntIterator2 workSharingIndexesBegin, UnsignedIntIterator2 workSharingIndexesEnd)
simple CCP load balacing implementation.
Definition: load_balancing.hpp:14