19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20 #include <xmmintrin.h>
21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23 #elif KMP_HAVE_ALIGNED_ALLOC
24 #define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
25 #define KMP_ALIGNED_FREE(ptr) free(ptr)
26 #elif KMP_HAVE_POSIX_MEMALIGN
27 static inline void *KMP_ALIGNED_ALLOCATE(
size_t size,
size_t alignment) {
29 int n = posix_memalign(&ptr, alignment, size);
37 #define KMP_ALIGNED_FREE(ptr) free(ptr)
38 #elif KMP_HAVE__ALIGNED_MALLOC
40 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
41 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
43 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
44 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
50 #ifndef KMP_FOURLINE_ALIGN_CACHE
51 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
54 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
56 class distributedBarrier {
58 kmp_uint32
volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
62 std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
66 kmp_uint64
volatile KMP_FOURLINE_ALIGN_CACHE iter;
70 std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
73 void init(
size_t nthr);
74 void resize(
size_t nthr);
75 void computeGo(
size_t n);
76 void computeVarsForN(
size_t n);
83 IDEAL_CONTENTION = 16,
86 flags_s *flags[MAX_ITERS];
91 size_t KMP_ALIGN_CACHE num_threads;
92 size_t KMP_ALIGN_CACHE max_threads;
94 size_t KMP_ALIGN_CACHE num_gos;
96 size_t KMP_ALIGN_CACHE num_groups;
98 size_t KMP_ALIGN_CACHE threads_per_go;
99 bool KMP_ALIGN_CACHE fix_threads_per_go;
101 size_t KMP_ALIGN_CACHE threads_per_group;
103 size_t KMP_ALIGN_CACHE gos_per_group;
106 distributedBarrier() =
delete;
107 ~distributedBarrier() =
delete;
110 static distributedBarrier *allocate(
int nThreads) {
111 distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
112 sizeof(distributedBarrier), 4 * CACHE_LINE);
114 KMP_FATAL(MemoryAllocFailed);
118 for (
int i = 0; i < MAX_ITERS; ++i)
124 d->fix_threads_per_go =
false;
126 d->computeGo(nThreads);
131 static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
133 void update_num_threads(
size_t nthr) { init(nthr); }
135 bool need_resize(
size_t new_nthr) {
return (new_nthr > max_threads); }
136 size_t get_num_threads() {
return num_threads; }
137 kmp_uint64 go_release();