10 for (
int i = 1;; i *= 2)
23 template <
class iteratorType,
class valueType>
24 void parallel_iota(iteratorType begin, iteratorType end,
const valueType value_begin) {
25 size_t distance = std::distance(begin, end);
27 if (value_begin == 0) {
28 #pragma omp parallel for
29 for (
size_t i = 0; i < distance; ++i)
32 #pragma omp parallel for
33 for (
size_t i = 0; i < distance; ++i)
34 begin[i] = value_begin + i;
38 template <
class idIteratorType,
class countIteratorType,
class functionType>
40 countIteratorType offset, countIteratorType offset_end,
41 functionType
const partitioner) {
43 int const n_segment = std::distance(offset, offset_end) - 1;
44 long long int const id_end = std::distance(idx_in, idx_in_end);
47 std::fill(offset, offset_end - 1, 0);
48 offset[n_segment] = id_end;
55 for (
long long int i = id_end - 1; i >= 0; --i) {
56 auto key = partitioner(idx_in[i]);
60 std::partial_sum(offset, offset + n_segment, offset);
62 for (
long long int i = 0; i < id_end; ++i) {
64 auto key = partitioner(idx);
65 idx_buffer[--offset[key]] = idx;
68 std::copy(idx_buffer, idx_buffer + id_end, idx_in);
72 template <
class idIteratorType,
class countIteratorType,
class functionType>
74 countIteratorType offset, countIteratorType offset_end,
75 functionType
const partitioner) {
77 long long int iota_offset = iotaOffset;
78 int const n_segment = std::distance(offset, offset_end) - 1;
79 long long int const id_end = std::distance(idx_in, idx_in_end);
82 std::fill(offset, offset_end - 1, 0);
83 offset[n_segment] = id_end;
86 std::iota(idx_in, idx_in_end, iota_offset);
92 for (
long long int i = id_end + iota_offset - 1; i >= iota_offset; --i) {
93 auto key = partitioner(i);
97 std::partial_sum(offset, offset + n_segment, offset);
99 for (
long long int i = iota_offset; i < id_end + iota_offset; +i) {
100 auto key = partitioner(i);
101 idx_in[--offset[key]] = i;
107 template <
class idIteratorType,
class countIteratorType,
class functionType>
109 countIteratorType offset, countIteratorType offset_end,
110 functionType
const partitioner) {
112 int const n_segment = std::distance(offset, offset_end) - 1;
113 long long int const id_end = std::distance(idx_in, idx_in_end);
117 offset[n_segment] = id_end;
122 std::fill(offset, offset_end, 0);
130 num_threads = omp_get_num_threads();
132 std::vector<size_t> count(n_segment*num_threads, 0);
136 int thread_id = omp_get_thread_num();
138 long long int begin = id_end*thread_id/num_threads;
139 long long int end = id_end*(thread_id + 1)/num_threads;
140 for (
long long int i = end - 1; i >= begin; --i) {
141 auto key = partitioner(idx_in[i]);
142 ++count[key*num_threads + thread_id];
146 __gnu_parallel::partial_sum(count.begin(), count.begin() + n_segment*num_threads, count.begin());
150 int thread_id = omp_get_thread_num();
152 long long int begin = id_end*thread_id/num_threads;
153 long long int end = id_end*(thread_id + 1)/num_threads;
154 for (
long long int i = begin; i < end; ++i) {
155 auto idx = idx_in[i];
156 auto key = partitioner(idx);
157 idx_buffer[--count[key*num_threads + thread_id]] = idx;
161 #pragma omp parallel for
162 for (
int i = 1; i < n_segment; ++i)
163 offset[i] = count[i*num_threads];
165 std::copy(idx_buffer, idx_buffer + id_end, idx_in);
169 template <
class idIteratorType,
class countIteratorType,
class functionType>
171 countIteratorType offset, countIteratorType offset_end,
172 functionType
const partitioner) {
174 int const n_segment = std::distance(offset, offset_end) - 1;
175 long long int const id_end = std::distance(idx_in, idx_in_end);
179 offset[n_segment] = id_end;
181 if (n_segment == 1) {
186 std::fill(offset, offset_end, 0);
194 num_threads = omp_get_num_threads();
196 std::vector<size_t> count(n_segment*num_threads, 0);
200 int thread_id = omp_get_thread_num();
202 long long int begin = id_end*thread_id/num_threads;
203 long long int end = id_end*(thread_id + 1)/num_threads;
204 for (
long long int i = end + iotaOffset - 1; i >= begin + iotaOffset; --i) {
205 auto key = partitioner(i);
206 ++count[key*num_threads + thread_id];
210 __gnu_parallel::partial_sum(count.begin(), count.begin() + n_segment*num_threads, count.begin());
214 int thread_id = omp_get_thread_num();
216 long long int begin = id_end*thread_id/num_threads;
217 long long int end = id_end*(thread_id + 1)/num_threads;
218 for (
long long int i = begin + iotaOffset; i < end + iotaOffset; ++i) {
219 auto key = partitioner(i);
220 idx_in[--count[key*num_threads + thread_id]] = i;
224 #pragma omp parallel for
225 for (
int i = 1; i < n_segment; ++i)
226 offset[i] = count[i*num_threads];
QuIDS utility function and variable namespace.
Definition: algorithm.hpp:6
void generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions
Definition: algorithm.hpp:39
int log_2_upper_bound(int n)
returns the upperbound bound of the log2
Definition: algorithm.hpp:16
int nearest_power_of_two(int n)
returns the upper bound as a power of two
Definition: algorithm.hpp:9
void parallel_generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions in parallel
Definition: algorithm.hpp:108
void generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions without an initialized index list
Definition: algorithm.hpp:73
void parallel_generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions without an initialized index list in parallel
Definition: algorithm.hpp:170
void parallel_iota(iteratorType begin, iteratorType end, const valueType value_begin)
parallel iota
Definition: algorithm.hpp:24