5#include <parallel/algorithm>
6#include <parallel/numeric>
13#include "utils/libs/robin_hood.h"
15#include "utils/vector.hpp"
16#include "utils/load_balancing.hpp"
17#include "utils/algorithm.hpp"
18#include "utils/memory.hpp"
19#include "utils/random.hpp"
22 #define PROBA_TYPE double
24#ifndef HASH_MAP_OVERHEAD
25 #define HASH_MAP_OVERHEAD 1.7
27#ifndef ALIGNMENT_BYTE_LENGTH
28 #define ALIGNMENT_BYTE_LENGTH 8
31 #define TOLERANCE 1e-30;
34 #define SAFETY_MARGIN 0.2
36#ifndef LOAD_BALANCING_BUCKET_PER_THREAD
37 #define LOAD_BALANCING_BUCKET_PER_THREAD 32
40#define ITERATION_MEMORY_SIZE 2*sizeof(PROBA_TYPE) + 3*sizeof(size_t) + sizeof(float) + 2*sizeof(uint)
41#define SYMBOLIC_ITERATION_MEMORY_SIZE 2*sizeof(PROBA_TYPE) + 4*sizeof(size_t) + 2*sizeof(uint) + sizeof(float)
47 #define omp_set_nested(i)
48 #define omp_get_thread_num() 0
49 #define omp_get_num_threads() 1
64 #ifdef SIMPLE_TRUNCATION
73 typedef std::complex<PROBA_TYPE>
mag_t;
81 typedef std::function<void(
char* parent_begin,
char* parent_end,
mag_t &mag)>
modifier_t;
83 typedef std::function<PROBA_TYPE(
char const *object_begin,
char const *object_end)>
observable_t;
85 typedef std::function<void(
const char* step)>
debug_t;
96 return alignment_offset;
110 virtual inline void get_num_child(
char const *parent_begin,
char const *parent_end, uint &num_child, uint &max_child_size)
const = 0;
119 virtual inline void populate_child(
char const *parent_begin,
char const *parent_end,
char*
const child_begin, uint
const child_id, uint &size,
mag_t &mag)
const = 0;
126 virtual inline void populate_child_simple(
char const *parent_begin,
char const *parent_end,
char*
const child_begin, uint
const child_id)
const {
127 uint size_placeholder;
128 mag_t mag_placeholder;
130 size_placeholder, mag_placeholder);
138 virtual inline size_t hasher(
char const *object_begin,
char const *object_end)
const {
139 return std::hash<std::string_view>()(std::string_view(object_begin, std::distance(object_begin, object_end)));
162 append(object_begin_, object_end_);
169 void append(
char const *object_begin_,
char const *object_end_,
mag_t const mag=1) {
171 size_t size = std::distance(object_begin_, object_end_);
175 allocate(offset + size + alignment_offset);
177 for (
size_t i = 0; i < size; ++i)
178 objects[offset + i] = object_begin_[i];
182 object_begin[
num_object] = offset + size + alignment_offset;
189 void pop(
size_t n=1,
bool normalize_=
true) {
197 if (normalize_) normalize();
211 PROBA_TYPE local_avg = 0;
213 for (
size_t oid = 0; oid <
num_object; ++oid) {
215 std::complex<PROBA_TYPE> mag;
218 char const *this_object_begin;
219 get_object(oid, this_object_begin, size, mag);
220 local_avg += observable(this_object_begin, this_object_begin + size) * std::norm(mag);
237 void get_object(
size_t const object_id,
char *& object_begin_, uint &object_size_,
mag_t *&mag) {
238 object_size_ = object_size[object_id];
239 mag = &magnitude[object_id];
240 object_begin_ = &objects[object_begin[object_id]];
249 void get_object(
size_t const object_id,
char const *& object_begin_, uint &object_size_,
mag_t &mag)
const {
250 object_size_ = object_size[object_id];
251 mag = magnitude[object_id];
252 object_begin_ = &objects[object_begin[object_id]];
261 mutable size_t truncated_num_object = 0;
262 mutable uint ub_symbolic_object_size = 0;
275 #pragma omp parallel sections
302 void inline allocate(
size_t size)
const {
310 size_t get_mem_size()
const {
311 static const size_t iteration_memory_size = ITERATION_MEMORY_SIZE;
312 return magnitude.size()*iteration_memory_size + objects.size();
314 size_t get_object_length()
const {
317 size_t get_num_symbolic_object()
const {
318 return __gnu_parallel::accumulate(&num_childs[0], &num_childs[0] +
num_object, (
size_t)0);
322 void compute_num_child(
rule_t const *rule,
debug_t mid_step_function=[](
const char*){})
const;
323 void prepare_truncate(
debug_t mid_step_function=[](
const char*){})
const;
324 size_t get_truncated_mem_size(
size_t begin_num_object=0)
const;
325 void truncate(
size_t begin_num_object,
size_t max_num_object,
debug_t mid_step_function=[](
const char*){})
const;
326 void generate_symbolic_iteration(
rule_t const *rule,
sy_it_t &symbolic_iteration,
debug_t mid_step_function=[](
const char*){})
const;
328 void normalize(
debug_t mid_step_function=[](
const char*){});
348 size_t next_iteration_num_object = 0;
350 std::vector<char*> placeholder;
363 #pragma omp parallel sections
392 void inline reserve(
size_t max_size) {
396 num_threads = omp_get_num_threads();
398 placeholder.resize(num_threads);
402 auto &buffer = placeholder[omp_get_thread_num()];
405 buffer =
new char[max_size];
413 size_t get_mem_size()
const {
414 static const size_t symbolic_iteration_memory_size = SYMBOLIC_ITERATION_MEMORY_SIZE;
415 return magnitude.size()*symbolic_iteration_memory_size;
418 void compute_collisions(
debug_t mid_step_function=[](
const char*){});
419 size_t get_truncated_mem_size(
size_t begin_num_object=0)
const;
420 void truncate(
size_t begin_num_object,
size_t max_num_object,
debug_t mid_step_function=[](
const char*){});
421 void prepare_truncate(
debug_t mid_step_function=[](
const char*){});
422 void finalize(
rule_t const *rule,
it_t const &last_iteration,
it_t &next_iteration,
debug_t mid_step_function=[](
const char*){});
449 mid_step_function(
"truncate_symbolic - prepare");
450 iteration.prepare_truncate(mid_step_function);
453 mid_step_function(
"truncate_symbolic");
454 if (max_num_object == 0) {
461 if (
iteration.get_truncated_mem_size() > target_memory) {
463 while (end > begin + 1) {
464 size_t middle = (end + begin) / 2;
465 iteration.truncate(begin, middle, mid_step_function);
467 size_t used_memory =
iteration.get_truncated_mem_size(begin);
468 if (used_memory < target_memory) {
469 target_memory -= used_memory;
476 iteration.truncate(0, max_num_object, mid_step_function);
479 if (iteration.num_object > 0) {
480 if (iteration.truncated_num_object < next_iteration.num_object)
481 next_iteration.resize(iteration.truncated_num_object);
482 size_t next_object_size = iteration.truncated_num_object*iteration.get_object_length()/iteration.num_object;
483 if (next_object_size < next_iteration.objects.size())
484 next_iteration.allocate(next_object_size);
488 iteration.generate_symbolic_iteration(rule, symbolic_iteration, mid_step_function);
489 symbolic_iteration.compute_collisions(mid_step_function);
490 symbolic_iteration.next_iteration_num_object = symbolic_iteration.num_object_after_interferences;
494 mid_step_function(
"truncate - prepare");
495 symbolic_iteration.prepare_truncate(mid_step_function);
498 mid_step_function(
"truncate");
499 if (max_num_object == 0) {
506 if (symbolic_iteration.get_truncated_mem_size() > target_memory) {
507 size_t begin = 0, end = symbolic_iteration.num_object_after_interferences;
508 while (end > begin + 1) {
509 size_t middle = (end + begin) / 2;
510 symbolic_iteration.truncate(begin, middle, mid_step_function);
512 size_t used_memory = symbolic_iteration.get_truncated_mem_size(begin);
513 if (used_memory < target_memory) {
514 target_memory -= used_memory;
521 symbolic_iteration.truncate(0, max_num_object, mid_step_function);
524 symbolic_iteration.finalize(rule, iteration, next_iteration, mid_step_function);
525 next_iteration.normalize(mid_step_function);
531 void iteration::compute_num_child(
rule_t const *rule,
debug_t mid_step_function)
const {
535 mid_step_function(
"num_child");
540 ub_symbolic_object_size = 0;
542 #pragma omp parallel for reduction(max:ub_symbolic_object_size)
543 for (
size_t oid = 0; oid <
num_object; ++oid) {
545 rule->get_num_child(&objects[object_begin[oid]],
546 &objects[object_begin[oid] + object_size[oid]],
547 num_childs[oid], size);
548 ub_symbolic_object_size = std::max(ub_symbolic_object_size, size);
551 __gnu_parallel::partial_sum(num_childs.begin(), num_childs.begin() +
num_object, child_begin.begin() + 1);
557 size_t iteration::get_truncated_mem_size(
size_t begin_num_object)
const {
558 static const size_t iteration_memory_size = ITERATION_MEMORY_SIZE;
560 static const float hash_map_size = HASH_MAP_OVERHEAD*2*
sizeof(size_t);
561 static const size_t symbolic_iteration_memory_size = SYMBOLIC_ITERATION_MEMORY_SIZE;
563 size_t mem_size = iteration_memory_size*(truncated_num_object - begin_num_object);
564 for (
size_t i = begin_num_object; i < truncated_num_object; ++i) {
565 size_t oid = truncated_oid[i];
567 mem_size += object_begin[oid + 1] - object_begin[oid];
568 mem_size += num_childs[oid]*(symbolic_iteration_memory_size + hash_map_size);
577 void iteration::prepare_truncate(
debug_t mid_step_function)
const {
585 utils::random_generator rng;
589 random_selector[oid] = rng() / std::norm(magnitude[oid]);
596 void iteration::truncate(
size_t begin_num_object,
size_t max_num_object,
debug_t mid_step_function)
const {
607 auto begin = truncated_oid.begin() + begin_num_object;
608 auto middle = truncated_oid.begin() + max_num_object;
609 auto end = truncated_oid.begin() + truncated_num_object;
613 __gnu_parallel::nth_element(begin, middle, end,
614 [&](
size_t const &oid1,
size_t const &oid2) {
615 return std::norm(magnitude[oid1]) > std::norm(magnitude[oid2]);
619 __gnu_parallel::nth_element(begin, middle, end,
620 [&](
size_t const &oid1,
size_t const &oid2) {
621 return random_selector[oid1] < random_selector[oid2];
624 truncated_num_object = max_num_object;
630 void iteration::generate_symbolic_iteration(
rule_t const *rule,
sy_it_t &symbolic_iteration,
debug_t mid_step_function)
const {
631 if (truncated_num_object == 0) {
632 symbolic_iteration.num_object = 0;
633 mid_step_function(
"prepare_index");
634 mid_step_function(
"symbolic_iteration");
647 mid_step_function(
"prepare_index");
650 for (
size_t i = 0; i < truncated_num_object; ++i) {
651 size_t oid = truncated_oid[i];
653 child_begin[i + 1] = child_begin[i] + num_childs[oid];
656 symbolic_iteration.num_object = child_begin[truncated_num_object];
659 symbolic_iteration.
resize(symbolic_iteration.num_object);
660 symbolic_iteration.reserve(ub_symbolic_object_size);
664 auto thread_id = omp_get_thread_num();
667 for (
size_t i = 0; i < truncated_num_object; ++i) {
668 size_t oid = truncated_oid[i];
671 std::fill(&symbolic_iteration.parent_oid[child_begin[i]],
672 &symbolic_iteration.parent_oid[child_begin[i + 1]],
674 std::iota(&symbolic_iteration.child_id[child_begin[i]],
675 &symbolic_iteration.child_id[child_begin[i + 1]],
686 mid_step_function(
"symbolic_iteration");
689 for (
size_t oid = 0; oid < symbolic_iteration.num_object; ++oid) {
690 auto id = symbolic_iteration.parent_oid[oid];
693 symbolic_iteration.magnitude[oid] = magnitude[id];
694 rule->populate_child(&objects[object_begin[
id]],
695 &objects[object_begin[
id] + object_size[
id]],
696 symbolic_iteration.placeholder[thread_id], symbolic_iteration.child_id[oid],
697 symbolic_iteration.size[oid], symbolic_iteration.magnitude[oid]);
700 symbolic_iteration.hash[oid] = rule->hasher(symbolic_iteration.placeholder[thread_id],
701 symbolic_iteration.placeholder[thread_id] + symbolic_iteration.size[oid]);
709 void symbolic_iteration::compute_collisions(
debug_t mid_step_function) {
712 mid_step_function(
"compute_collisions - prepare");
713 mid_step_function(
"compute_collisions - insert");
714 mid_step_function(
"compute_collisions - finalize");
721 num_threads = omp_get_num_threads();
726 std::vector<int> load_balancing_begin(num_threads + 1, 0);
727 std::vector<size_t> partition_begin(num_bucket + 1, 0);
737 mid_step_function(
"compute_collisions - prepare");
739 &partition_begin[0], &partition_begin[num_bucket + 1],
740 [&](
size_t const oid) {
741 return hash[oid] >> offset;
753 load_balancing_begin.begin(), load_balancing_begin.end());
755 for (
size_t i = 0; i <= num_threads; ++i)
756 load_balancing_begin[i] = i*num_bucket/num_threads;
767 mid_step_function(
"compute_collisions - insert");
770 int thread_id = omp_get_thread_num();
771 int load_begin = load_balancing_begin[thread_id], load_end = load_balancing_begin[thread_id + 1];
772 for (
int j = load_begin; j < load_end; ++j) {
773 robin_hood::unordered_map<size_t, size_t> elimination_map;
775 size_t begin = partition_begin[j], end = partition_begin[j + 1];
777 elimination_map.reserve(end - begin);
778 for (
size_t i = begin; i < end; ++i) {
779 size_t oid = next_oid[i];
782 auto [it, unique] = elimination_map.insert({hash[oid], oid});
784 const size_t other_oid = it->second;
787 magnitude[other_oid] += magnitude[oid];
793 mid_step_function(
"compute_collisions - finalize");
802 size_t* partitioned_it = __gnu_parallel::partition(&next_oid[0], &next_oid[0] +
num_object,
803 [&](
size_t const &oid) {
804 return std::norm(magnitude[oid]) >
tolerance;
812 void symbolic_iteration::prepare_truncate(
debug_t mid_step_function) {
820 utils::random_generator rng;
824 size_t oid = next_oid[i];
825 random_selector[oid] = rng() / std::norm(magnitude[oid]);
833 size_t symbolic_iteration::get_truncated_mem_size(
size_t begin_num_object)
const {
834 static const size_t iteration_memory_size = ITERATION_MEMORY_SIZE;
836 size_t mem_size = iteration_memory_size*(next_iteration_num_object - begin_num_object);
837 for (
size_t i = begin_num_object; i < next_iteration_num_object; ++i) {
838 size_t this_mem_size = size[next_oid[i]];
840 mem_size += this_mem_size + alignment_offset;
849 void symbolic_iteration::truncate(
size_t begin_num_object,
size_t max_num_object,
debug_t mid_step_function) {
850 if (next_iteration_num_object == 0)
863 auto begin = next_oid.begin() + begin_num_object;
864 auto middle = next_oid.begin() + max_num_object;
865 auto end = next_oid.begin() + next_iteration_num_object;
869 __gnu_parallel::nth_element(begin, middle, end,
870 [&](
size_t const &oid1,
size_t const &oid2) {
871 return std::norm(magnitude[oid1]) > std::norm(magnitude[oid2]);
875 __gnu_parallel::nth_element(begin, middle, end,
876 [&](
size_t const &oid1,
size_t const &oid2) {
877 return random_selector[oid1] < random_selector[oid2];
882 next_iteration_num_object = max_num_object;
888 void symbolic_iteration::finalize(
rule_t const *rule,
it_t const &last_iteration,
it_t &next_iteration,
debug_t mid_step_function) {
889 if (next_iteration_num_object == 0) {
890 next_iteration.num_object = 0;
891 mid_step_function(
"prepare_final");
892 mid_step_function(
"final");
906 mid_step_function(
"prepare_final");
907 next_iteration.num_object = next_iteration_num_object;
910 __gnu_parallel::sort(&next_oid[0], &next_oid[0] + next_iteration.num_object);
913 next_iteration.resize(next_iteration.num_object);
916 #pragma omp parallel for
917 for (
size_t oid = 0; oid < next_iteration.num_object; ++oid) {
918 auto id = next_oid[oid];
922 next_iteration.object_size[oid] = size[id];
923 next_iteration.object_begin[oid + 1] = size[id] + alignment_offset;
924 next_iteration.magnitude[oid] = magnitude[id];
927 __gnu_parallel::partial_sum(&next_iteration.object_begin[1],
928 &next_iteration.object_begin[1] + next_iteration.num_object + 1,
929 &next_iteration.object_begin[1]);
931 next_iteration.allocate(next_iteration.object_begin[next_iteration.num_object]);
939 mid_step_function(
"final");
941 #pragma omp parallel for
942 for (
size_t oid = 0; oid < next_iteration.num_object; ++oid) {
943 auto id = next_oid[oid];
944 auto this_parent_oid = parent_oid[id];
946 rule->populate_child_simple(&last_iteration.objects[last_iteration.object_begin[this_parent_oid]],
947 &last_iteration.objects[last_iteration.object_begin[this_parent_oid] + last_iteration.object_size[this_parent_oid]],
948 &next_iteration.objects[next_iteration.object_begin[oid]],
956 void iteration::apply_modifier(
modifier_t const rule) {
957 #pragma omp parallel for
960 rule(&objects[object_begin[oid]],
961 &objects[object_begin[oid] + object_size[oid]],
968 void iteration::normalize(
debug_t mid_step_function) {
972 mid_step_function(
"normalize");
973 mid_step_function(
"end");
983 mid_step_function(
"normalize");
987 #pragma omp for reduction(+:total_proba)
991 PROBA_TYPE normalization_factor = std::sqrt(
total_proba);
993 if (normalization_factor != 1)
996 magnitude[oid] /= normalization_factor;
999 mid_step_function(
"end");
iteration (wave function) representation class
Definition: quids.hpp:144
friend void simulate(it_t &iteration, rule_t const *rule, it_t &next_iteration, sy_it_t &symbolic_iteration, size_t max_num_object, debug_t mid_step_function)
function to apply a dynamic to a wavefunction
Definition: quids.hpp:443
void get_object(size_t const object_id, char *&object_begin_, uint &object_size_, mag_t *&mag)
function to access a particular object (read-write access)
Definition: quids.hpp:237
iteration(char *object_begin_, char *object_end_)
constructor that insert a single object with magnitude 1
Definition: quids.hpp:161
void get_object(size_t const object_id, char const *&object_begin_, uint &object_size_, mag_t &mag) const
function to access a particular object (read-only access)
Definition: quids.hpp:249
PROBA_TYPE average_value(const observable_t observable) const
function to get the average value of a custom observable
Definition: quids.hpp:203
iteration()
simple empty wavefunction constructor
Definition: quids.hpp:152
void pop(size_t n=1, bool normalize_=true)
function that removes a given number of object from the "tail" of the memory representation
Definition: quids.hpp:189
void append(char const *object_begin_, char const *object_end_, mag_t const mag=1)
function that insert a single object with a given magnitude
Definition: quids.hpp:169
PROBA_TYPE total_proba
total probability retained after previous truncation (if any).
Definition: quids.hpp:149
size_t num_object
number of objects contained in the wave function
Definition: quids.hpp:147
class represneting a dynamic (or rule).
Definition: quids.hpp:100
virtual void populate_child_simple(char const *parent_begin, char const *parent_end, char *const child_begin, uint const child_id) const
function generating a child, without computing its magnitude and size.
Definition: quids.hpp:126
virtual void populate_child(char const *parent_begin, char const *parent_end, char *const child_begin, uint const child_id, uint &size, mag_t &mag) const =0
function generating a child, and computing its magnitude and size.
rule()
base constructor
Definition: quids.hpp:103
virtual void get_num_child(char const *parent_begin, char const *parent_end, uint &num_child, uint &max_child_size) const =0
function geting the number of children
virtual size_t hasher(char const *object_begin, char const *object_end) const
optional function hashing an object.
Definition: quids.hpp:138
symbolic iteration (computation intermediary)
Definition: quids.hpp:333
friend void simulate(it_t &iteration, rule_t const *rule, it_t &next_iteration, sy_it_t &symbolic_iteration, size_t max_num_object, debug_t mid_step_function)
function to apply a dynamic to a wavefunction
Definition: quids.hpp:443
size_t num_object_after_interferences
number of objects obrained after eliminating duplicates
Definition: quids.hpp:341
symbolic_iteration()
simple constructor
Definition: quids.hpp:336
size_t num_object
number of objects considered in the symbolic step
Definition: quids.hpp:339
void resize(const Int n_, const uint align_byte_length_=std::alignment_of< T >()) const
align_byte_length_ should be used to reallign the buffer, which is not yet implemented as realloc doe...
Definition: vector.hpp:111
void load_balancing_from_prefix_sum(const UnsignedIntIterator1 prefixSumLoadBegin, const UnsignedIntIterator1 prefixSumLoadEnd, UnsignedIntIterator2 workSharingIndexesBegin, UnsignedIntIterator2 workSharingIndexesEnd)
simple CCP load balacing implementation.
Definition: load_balancing.hpp:14
int log_2_upper_bound(int n)
returns the upperbound bound of the log2
Definition: algorithm.hpp:16
int nearest_power_of_two(int n)
returns the upper bound as a power of two
Definition: algorithm.hpp:9
size_t get_free_mem()
function that get the total amount of available free memory.
Definition: memory.hpp:47
void parallel_generalized_partition_from_iota(idIteratorType idx_in, idIteratorType idx_in_end, long long int const iotaOffset, countIteratorType offset, countIteratorType offset_end, functionType const partitioner)
linear partitioning algorithm into n partitions without an initialized index list in parallel
Definition: algorithm.hpp:170
void parallel_iota(iteratorType begin, iteratorType end, const valueType value_begin)
parallel iota
Definition: algorithm.hpp:24
QuIDS namespace.
Definition: quids.hpp:55
void simulate(it_t &iteration, modifier_t const rule)
function to apply a modifer to a wave function
Definition: quids.hpp:431
std::complex< PROBA_TYPE > mag_t
complex magnitude type
Definition: quids.hpp:73
class symbolic_iteration sy_it_t
symbolic iteration type
Definition: quids.hpp:77
std::function< void(const char *step)> debug_t
debuging function type
Definition: quids.hpp:85
class iteration it_t
iteration class type
Definition: quids.hpp:75
class rule rule_t
dynamic (or rule) type
Definition: quids.hpp:79
int load_balancing_bucket_per_thread
number of load balancing buckets per thread
Definition: quids.hpp:63
std::function< PROBA_TYPE(char const *object_begin, char const *object_end)> observable_t
observable definition typedef
Definition: quids.hpp:83
float safety_margin
memory safety margin (0.2 = 80% memory usage target)
Definition: quids.hpp:61
PROBA_TYPE tolerance
tolerance for objects (remove objects with a smaller probability)
Definition: quids.hpp:59
uint align_byte_length
amount of byte to align objects to
Definition: quids.hpp:57
uint get_alignment_offset(const uint size)
utility function to get the alignment offset to put at the end of an object of size "size"
Definition: quids.hpp:88
bool simple_truncation
simple truncation toggle - disable probabilistic truncation, increasing "accuracy" but reducing the r...
Definition: quids.hpp:69
std::function< void(char *parent_begin, char *parent_end, mag_t &mag)> modifier_t
simple "modifier" type (single input, single output of same size dynamic)
Definition: quids.hpp:81