LLVM OpenMP* Runtime Library
kmp_barrier.h
1 /*
2  * kmp_barrier.h
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_BARRIER_H
14 #define KMP_BARRIER_H
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 
19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20 #include <xmmintrin.h>
21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23 #elif KMP_HAVE_ALIGNED_ALLOC
24 #define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
25 #define KMP_ALIGNED_FREE(ptr) free(ptr)
26 #elif KMP_HAVE_POSIX_MEMALIGN
27 static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
28  void *ptr;
29  int n = posix_memalign(&ptr, alignment, size);
30  if (n != 0) {
31  if (ptr)
32  free(ptr);
33  return nullptr;
34  }
35  return ptr;
36 }
37 #define KMP_ALIGNED_FREE(ptr) free(ptr)
38 #elif KMP_HAVE__ALIGNED_MALLOC
39 #include <malloc.h>
40 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
41 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
42 #else
43 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
44 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
45 #endif
46 
47 // Use four cache lines: MLC tends to prefetch the next or previous cache line
48 // creating a possible fake conflict between cores, so this is the only way to
49 // guarantee that no such prefetch can happen.
50 #ifndef KMP_FOURLINE_ALIGN_CACHE
51 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
52 #endif
53 
54 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
55 
56 class distributedBarrier {
57  struct flags_s {
58  kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
59  };
60 
61  struct go_s {
62  std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
63  };
64 
65  struct iter_s {
66  kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
67  };
68 
69  struct sleep_s {
70  std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
71  };
72 
73  void init(size_t nthr);
74  void resize(size_t nthr);
75  void computeGo(size_t n);
76  void computeVarsForN(size_t n);
77 
78 public:
79  enum {
80  MAX_ITERS = 3,
81  MAX_GOS = 8,
82  IDEAL_GOS = 4,
83  IDEAL_CONTENTION = 16,
84  };
85 
86  flags_s *flags[MAX_ITERS];
87  go_s *go;
88  iter_s *iter;
89  sleep_s *sleep;
90 
91  size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
92  size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
93  // number of go signals each requiring one write per iteration
94  size_t KMP_ALIGN_CACHE num_gos;
95  // number of groups of gos
96  size_t KMP_ALIGN_CACHE num_groups;
97  // threads per go signal
98  size_t KMP_ALIGN_CACHE threads_per_go;
99  bool KMP_ALIGN_CACHE fix_threads_per_go;
100  // threads per group
101  size_t KMP_ALIGN_CACHE threads_per_group;
102  // number of go signals in a group
103  size_t KMP_ALIGN_CACHE gos_per_group;
104  void *team_icvs;
105 
106  distributedBarrier() = delete;
107  ~distributedBarrier() = delete;
108 
109  // Used instead of constructor to create aligned data
110  static distributedBarrier *allocate(int nThreads) {
111  distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
112  sizeof(distributedBarrier), 4 * CACHE_LINE);
113  if (!d) {
114  KMP_FATAL(MemoryAllocFailed);
115  }
116  d->num_threads = 0;
117  d->max_threads = 0;
118  for (int i = 0; i < MAX_ITERS; ++i)
119  d->flags[i] = NULL;
120  d->go = NULL;
121  d->iter = NULL;
122  d->sleep = NULL;
123  d->team_icvs = NULL;
124  d->fix_threads_per_go = false;
125  // calculate gos and groups ONCE on base size
126  d->computeGo(nThreads);
127  d->init(nThreads);
128  return d;
129  }
130 
131  static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
132 
133  void update_num_threads(size_t nthr) { init(nthr); }
134 
135  bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
136  size_t get_num_threads() { return num_threads; }
137  kmp_uint64 go_release();
138  void go_reset();
139 };
140 
141 #endif // KMP_BARRIER_H