QuIDS: Quantum Irregular Dynamic Simulator
algorithm.hpp
1#pragma once
2
3#include <vector>
4
6namespace quids::utils {
7
10 for (int i = 1;; i *= 2)
11 if (i >= n)
12 return i;
13 }
14
16 int log_2_upper_bound(int n) {
17 for (int i = 1;; ++i)
18 if (1 << i >= n)
19 return i;
20 }
21
23 template <class iteratorType, class valueType>
24 void parallel_iota(iteratorType begin, iteratorType end, const valueType value_begin) {
25 size_t distance = std::distance(begin, end);
26
27 if (value_begin == 0) {
28 #pragma omp parallel for
29 for (size_t i = 0; i < distance; ++i)
30 begin[i] = i;
31 } else
32 #pragma omp parallel for
33 for (size_t i = 0; i < distance; ++i)
34 begin[i] = value_begin + i;
35 }
36
38 template <class idIteratorType, class countIteratorType, class functionType>
39 void generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer,
40 countIteratorType offset, countIteratorType offset_end,
41 functionType const partitioner) {
42
43 int const n_segment = std::distance(offset, offset_end) - 1;
44 long long int const id_end = std::distance(idx_in, idx_in_end);
45
46 // limit values
47 std::fill(offset, offset_end - 1, 0);
48 offset[n_segment] = id_end;
49
50 if (n_segment == 1)
51 return;
52 if (id_end == 0)
53 return;
54
55 for (long long int i = id_end - 1; i >= 0; --i) {
56 auto key = partitioner(idx_in[i]);
57 ++offset[key];
58 }
59
60 std::partial_sum(offset, offset + n_segment, offset);
61
62 for (long long int i = 0; i < id_end; ++i) {
63 auto idx = idx_in[i];
64 auto key = partitioner(idx);
65 idx_buffer[--offset[key]] = idx;
66 }
67
68 std::copy(idx_buffer, idx_buffer + id_end, idx_in);
69 }
70
72 template <class idIteratorType, class countIteratorType, class functionType>
73 void generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset,
74 countIteratorType offset, countIteratorType offset_end,
75 functionType const partitioner) {
76
77 long long int iota_offset = iotaOffset;
78 int const n_segment = std::distance(offset, offset_end) - 1;
79 long long int const id_end = std::distance(idx_in, idx_in_end);
80
81 // limit values
82 std::fill(offset, offset_end - 1, 0);
83 offset[n_segment] = id_end;
84
85 if (n_segment == 1) {
86 std::iota(idx_in, idx_in_end, iota_offset);
87 return;
88 }
89 if (id_end == 0)
90 return;
91
92 for (long long int i = id_end + iota_offset - 1; i >= iota_offset; --i) {
93 auto key = partitioner(i);
94 ++offset[key];
95 }
96
97 std::partial_sum(offset, offset + n_segment, offset);
98
99 for (long long int i = iota_offset; i < id_end + iota_offset; +i) {
100 auto key = partitioner(i);
101 idx_in[--offset[key]] = i;
102 }
103 }
104
105
107 template <class idIteratorType, class countIteratorType, class functionType>
108 void parallel_generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer,
109 countIteratorType offset, countIteratorType offset_end,
110 functionType const partitioner) {
111
112 int const n_segment = std::distance(offset, offset_end) - 1;
113 long long int const id_end = std::distance(idx_in, idx_in_end);
114
115 // limit values
116 offset[0] = 0;
117 offset[n_segment] = id_end;
118
119 if (n_segment == 1)
120 return;
121 if (id_end == 0) {
122 std::fill(offset, offset_end, 0);
123 return;
124 }
125
126 // number of threads
127 int num_threads;
128 #pragma omp parallel
129 #pragma omp single
130 num_threads = omp_get_num_threads();
131
132 std::vector<size_t> count(n_segment*num_threads, 0);
133
134 #pragma omp parallel
135 {
136 int thread_id = omp_get_thread_num();
137
138 long long int begin = id_end*thread_id/num_threads;
139 long long int end = id_end*(thread_id + 1)/num_threads;
140 for (long long int i = end - 1; i >= begin; --i) {
141 auto key = partitioner(idx_in[i]);
142 ++count[key*num_threads + thread_id];
143 }
144 }
145
146 __gnu_parallel::partial_sum(count.begin(), count.begin() + n_segment*num_threads, count.begin());
147
148 #pragma omp parallel
149 {
150 int thread_id = omp_get_thread_num();
151
152 long long int begin = id_end*thread_id/num_threads;
153 long long int end = id_end*(thread_id + 1)/num_threads;
154 for (long long int i = begin; i < end; ++i) {
155 auto idx = idx_in[i];
156 auto key = partitioner(idx);
157 idx_buffer[--count[key*num_threads + thread_id]] = idx;
158 }
159 }
160
161 #pragma omp parallel for
162 for (int i = 1; i < n_segment; ++i)
163 offset[i] = count[i*num_threads];
164
165 std::copy(idx_buffer, idx_buffer + id_end, idx_in);
166 }
167
169 template <class idIteratorType, class countIteratorType, class functionType>
170 void parallel_generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset,
171 countIteratorType offset, countIteratorType offset_end,
172 functionType const partitioner) {
173
174 int const n_segment = std::distance(offset, offset_end) - 1;
175 long long int const id_end = std::distance(idx_in, idx_in_end);
176
177 // limit values
178 offset[0] = 0;
179 offset[n_segment] = id_end;
180
181 if (n_segment == 1) {
182 parallel_iota(idx_in, idx_in_end, iotaOffset);
183 return;
184 }
185 if (id_end == 0) {
186 std::fill(offset, offset_end, 0);
187 return;
188 }
189
190 // number of threads
191 int num_threads;
192 #pragma omp parallel
193 #pragma omp single
194 num_threads = omp_get_num_threads();
195
196 std::vector<size_t> count(n_segment*num_threads, 0);
197
198 #pragma omp parallel
199 {
200 int thread_id = omp_get_thread_num();
201
202 long long int begin = id_end*thread_id/num_threads;
203 long long int end = id_end*(thread_id + 1)/num_threads;
204 for (long long int i = end + iotaOffset - 1; i >= begin + iotaOffset; --i) {
205 auto key = partitioner(i);
206 ++count[key*num_threads + thread_id];
207 }
208 }
209
210 __gnu_parallel::partial_sum(count.begin(), count.begin() + n_segment*num_threads, count.begin());
211
212 #pragma omp parallel
213 {
214 int thread_id = omp_get_thread_num();
215
216 long long int begin = id_end*thread_id/num_threads;
217 long long int end = id_end*(thread_id + 1)/num_threads;
218 for (long long int i = begin + iotaOffset; i < end + iotaOffset; ++i) {
219 auto key = partitioner(i);
220 idx_in[--count[key*num_threads + thread_id]] = i;
221 }
222 }
223
224 #pragma omp parallel for
225 for (int i = 1; i < n_segment; ++i)
226 offset[i] = count[i*num_threads];
227 }
228}
QuIDS utility function and variable namespace.
Definition: algorithm.hpp:6
void generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions
Definition: algorithm.hpp:39
int log_2_upper_bound(int n)
returns the upperbound bound of the log2
Definition: algorithm.hpp:16
int nearest_power_of_two(int n)
returns the upper bound as a power of two
Definition: algorithm.hpp:9
void parallel_generalized_partition(idIteratorType idx_in, idIteratorType idx_in_end, idIteratorType idx_buffer, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions in parallel
Definition: algorithm.hpp:108
void generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions without an initialized index list
Definition: algorithm.hpp:73
void parallel_generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions without an initialized index list in parallel
Definition: algorithm.hpp:170
void parallel_iota(iteratorType begin, iteratorType end, const valueType value_begin)
parallel iota
Definition: algorithm.hpp:24