LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23  class Mask : public KMPAffinity::Mask {
24  hwloc_cpuset_t mask;
25 
26  public:
27  Mask() {
28  mask = hwloc_bitmap_alloc();
29  this->zero();
30  }
31  ~Mask() { hwloc_bitmap_free(mask); }
32  void set(int i) override { hwloc_bitmap_set(mask, i); }
33  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35  void zero() override { hwloc_bitmap_zero(mask); }
36  void copy(const KMPAffinity::Mask *src) override {
37  const Mask *convert = static_cast<const Mask *>(src);
38  hwloc_bitmap_copy(mask, convert->mask);
39  }
40  void bitwise_and(const KMPAffinity::Mask *rhs) override {
41  const Mask *convert = static_cast<const Mask *>(rhs);
42  hwloc_bitmap_and(mask, mask, convert->mask);
43  }
44  void bitwise_or(const KMPAffinity::Mask *rhs) override {
45  const Mask *convert = static_cast<const Mask *>(rhs);
46  hwloc_bitmap_or(mask, mask, convert->mask);
47  }
48  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49  int begin() const override { return hwloc_bitmap_first(mask); }
50  int end() const override { return -1; }
51  int next(int previous) const override {
52  return hwloc_bitmap_next(mask, previous);
53  }
54  int get_system_affinity(bool abort_on_error) override {
55  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56  "Illegal get affinity operation when not capable");
57  long retval =
58  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59  if (retval >= 0) {
60  return 0;
61  }
62  int error = errno;
63  if (abort_on_error) {
64  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65  }
66  return error;
67  }
68  int set_system_affinity(bool abort_on_error) const override {
69  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70  "Illegal set affinity operation when not capable");
71  long retval =
72  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73  if (retval >= 0) {
74  return 0;
75  }
76  int error = errno;
77  if (abort_on_error) {
78  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79  }
80  return error;
81  }
82 #if KMP_OS_WINDOWS
83  int set_process_affinity(bool abort_on_error) const override {
84  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
85  "Illegal set process affinity operation when not capable");
86  int error = 0;
87  const hwloc_topology_support *support =
88  hwloc_topology_get_support(__kmp_hwloc_topology);
89  if (support->cpubind->set_proc_cpubind) {
90  int retval;
91  retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
92  HWLOC_CPUBIND_PROCESS);
93  if (retval >= 0)
94  return 0;
95  error = errno;
96  if (abort_on_error)
97  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
98  }
99  return error;
100  }
101 #endif
102  int get_proc_group() const override {
103  int group = -1;
104 #if KMP_OS_WINDOWS
105  if (__kmp_num_proc_groups == 1) {
106  return 1;
107  }
108  for (int i = 0; i < __kmp_num_proc_groups; i++) {
109  // On windows, the long type is always 32 bits
110  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
111  unsigned long second_32_bits =
112  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
113  if (first_32_bits == 0 && second_32_bits == 0) {
114  continue;
115  }
116  if (group >= 0) {
117  return -1;
118  }
119  group = i;
120  }
121 #endif /* KMP_OS_WINDOWS */
122  return group;
123  }
124  };
125  void determine_capable(const char *var) override {
126  const hwloc_topology_support *topology_support;
127  if (__kmp_hwloc_topology == NULL) {
128  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
129  __kmp_hwloc_error = TRUE;
130  if (__kmp_affinity_verbose)
131  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
132  }
133  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
134  __kmp_hwloc_error = TRUE;
135  if (__kmp_affinity_verbose)
136  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
137  }
138  }
139  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
140  // Is the system capable of setting/getting this thread's affinity?
141  // Also, is topology discovery possible? (pu indicates ability to discover
142  // processing units). And finally, were there no errors when calling any
143  // hwloc_* API functions?
144  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
145  topology_support->cpubind->get_thisthread_cpubind &&
146  topology_support->discovery->pu && !__kmp_hwloc_error) {
147  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
148  KMP_AFFINITY_ENABLE(TRUE);
149  } else {
150  // indicate that hwloc didn't work and disable affinity
151  __kmp_hwloc_error = TRUE;
152  KMP_AFFINITY_DISABLE();
153  }
154  }
155  void bind_thread(int which) override {
156  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
157  "Illegal set affinity operation when not capable");
158  KMPAffinity::Mask *mask;
159  KMP_CPU_ALLOC_ON_STACK(mask);
160  KMP_CPU_ZERO(mask);
161  KMP_CPU_SET(which, mask);
162  __kmp_set_system_affinity(mask, TRUE);
163  KMP_CPU_FREE_FROM_STACK(mask);
164  }
165  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
166  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
167  KMPAffinity::Mask *allocate_mask_array(int num) override {
168  return new Mask[num];
169  }
170  void deallocate_mask_array(KMPAffinity::Mask *array) override {
171  Mask *hwloc_array = static_cast<Mask *>(array);
172  delete[] hwloc_array;
173  }
174  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
175  int index) override {
176  Mask *hwloc_array = static_cast<Mask *>(array);
177  return &(hwloc_array[index]);
178  }
179  api_type get_api_type() const override { return HWLOC; }
180 };
181 #endif /* KMP_USE_HWLOC */
182 
183 #if KMP_OS_LINUX || KMP_OS_FREEBSD
184 #if KMP_OS_LINUX
185 /* On some of the older OS's that we build on, these constants aren't present
186  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
187  all systems of the same arch where they are defined, and they cannot change.
188  stone forever. */
189 #include <sys/syscall.h>
190 #if KMP_ARCH_X86 || KMP_ARCH_ARM
191 #ifndef __NR_sched_setaffinity
192 #define __NR_sched_setaffinity 241
193 #elif __NR_sched_setaffinity != 241
194 #error Wrong code for setaffinity system call.
195 #endif /* __NR_sched_setaffinity */
196 #ifndef __NR_sched_getaffinity
197 #define __NR_sched_getaffinity 242
198 #elif __NR_sched_getaffinity != 242
199 #error Wrong code for getaffinity system call.
200 #endif /* __NR_sched_getaffinity */
201 #elif KMP_ARCH_AARCH64
202 #ifndef __NR_sched_setaffinity
203 #define __NR_sched_setaffinity 122
204 #elif __NR_sched_setaffinity != 122
205 #error Wrong code for setaffinity system call.
206 #endif /* __NR_sched_setaffinity */
207 #ifndef __NR_sched_getaffinity
208 #define __NR_sched_getaffinity 123
209 #elif __NR_sched_getaffinity != 123
210 #error Wrong code for getaffinity system call.
211 #endif /* __NR_sched_getaffinity */
212 #elif KMP_ARCH_RISCV64
213 #ifndef __NR_sched_setaffinity
214 #define __NR_sched_setaffinity 122
215 #elif __NR_sched_setaffinity != 122
216 #error Wrong code for setaffinity system call.
217 #endif /* __NR_sched_setaffinity */
218 #ifndef __NR_sched_getaffinity
219 #define __NR_sched_getaffinity 123
220 #elif __NR_sched_getaffinity != 123
221 #error Wrong code for getaffinity system call.
222 #endif /* __NR_sched_getaffinity */
223 #elif KMP_ARCH_X86_64
224 #ifndef __NR_sched_setaffinity
225 #define __NR_sched_setaffinity 203
226 #elif __NR_sched_setaffinity != 203
227 #error Wrong code for setaffinity system call.
228 #endif /* __NR_sched_setaffinity */
229 #ifndef __NR_sched_getaffinity
230 #define __NR_sched_getaffinity 204
231 #elif __NR_sched_getaffinity != 204
232 #error Wrong code for getaffinity system call.
233 #endif /* __NR_sched_getaffinity */
234 #elif KMP_ARCH_PPC64
235 #ifndef __NR_sched_setaffinity
236 #define __NR_sched_setaffinity 222
237 #elif __NR_sched_setaffinity != 222
238 #error Wrong code for setaffinity system call.
239 #endif /* __NR_sched_setaffinity */
240 #ifndef __NR_sched_getaffinity
241 #define __NR_sched_getaffinity 223
242 #elif __NR_sched_getaffinity != 223
243 #error Wrong code for getaffinity system call.
244 #endif /* __NR_sched_getaffinity */
245 # elif KMP_ARCH_MIPS
246 # ifndef __NR_sched_setaffinity
247 # define __NR_sched_setaffinity 4239
248 # elif __NR_sched_setaffinity != 4239
249 # error Wrong code for setaffinity system call.
250 # endif /* __NR_sched_setaffinity */
251 # ifndef __NR_sched_getaffinity
252 # define __NR_sched_getaffinity 4240
253 # elif __NR_sched_getaffinity != 4240
254 # error Wrong code for getaffinity system call.
255 # endif /* __NR_sched_getaffinity */
256 # elif KMP_ARCH_MIPS64
257 # ifndef __NR_sched_setaffinity
258 # define __NR_sched_setaffinity 5195
259 # elif __NR_sched_setaffinity != 5195
260 # error Wrong code for setaffinity system call.
261 # endif /* __NR_sched_setaffinity */
262 # ifndef __NR_sched_getaffinity
263 # define __NR_sched_getaffinity 5196
264 # elif __NR_sched_getaffinity != 5196
265 # error Wrong code for getaffinity system call.
266 # endif /* __NR_sched_getaffinity */
267 # else
268 #error Unknown or unsupported architecture
269 #endif /* KMP_ARCH_* */
270 #elif KMP_OS_FREEBSD
271 #include <pthread.h>
272 #include <pthread_np.h>
273 #endif
274 class KMPNativeAffinity : public KMPAffinity {
275  class Mask : public KMPAffinity::Mask {
276  typedef unsigned long mask_t;
277  typedef decltype(__kmp_affin_mask_size) mask_size_type;
278  static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
279  static const mask_t ONE = 1;
280  mask_size_type get_num_mask_types() const {
281  return __kmp_affin_mask_size / sizeof(mask_t);
282  }
283 
284  public:
285  mask_t *mask;
286  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
287  ~Mask() {
288  if (mask)
289  __kmp_free(mask);
290  }
291  void set(int i) override {
292  mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
293  }
294  bool is_set(int i) const override {
295  return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
296  }
297  void clear(int i) override {
298  mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
299  }
300  void zero() override {
301  mask_size_type e = get_num_mask_types();
302  for (mask_size_type i = 0; i < e; ++i)
303  mask[i] = (mask_t)0;
304  }
305  void copy(const KMPAffinity::Mask *src) override {
306  const Mask *convert = static_cast<const Mask *>(src);
307  mask_size_type e = get_num_mask_types();
308  for (mask_size_type i = 0; i < e; ++i)
309  mask[i] = convert->mask[i];
310  }
311  void bitwise_and(const KMPAffinity::Mask *rhs) override {
312  const Mask *convert = static_cast<const Mask *>(rhs);
313  mask_size_type e = get_num_mask_types();
314  for (mask_size_type i = 0; i < e; ++i)
315  mask[i] &= convert->mask[i];
316  }
317  void bitwise_or(const KMPAffinity::Mask *rhs) override {
318  const Mask *convert = static_cast<const Mask *>(rhs);
319  mask_size_type e = get_num_mask_types();
320  for (mask_size_type i = 0; i < e; ++i)
321  mask[i] |= convert->mask[i];
322  }
323  void bitwise_not() override {
324  mask_size_type e = get_num_mask_types();
325  for (mask_size_type i = 0; i < e; ++i)
326  mask[i] = ~(mask[i]);
327  }
328  int begin() const override {
329  int retval = 0;
330  while (retval < end() && !is_set(retval))
331  ++retval;
332  return retval;
333  }
334  int end() const override {
335  int e;
336  __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
337  return e;
338  }
339  int next(int previous) const override {
340  int retval = previous + 1;
341  while (retval < end() && !is_set(retval))
342  ++retval;
343  return retval;
344  }
345  int get_system_affinity(bool abort_on_error) override {
346  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
347  "Illegal get affinity operation when not capable");
348 #if KMP_OS_LINUX
349  long retval =
350  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
351 #elif KMP_OS_FREEBSD
352  int r =
353  pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
354  int retval = (r == 0 ? 0 : -1);
355 #endif
356  if (retval >= 0) {
357  return 0;
358  }
359  int error = errno;
360  if (abort_on_error) {
361  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
362  }
363  return error;
364  }
365  int set_system_affinity(bool abort_on_error) const override {
366  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
367  "Illegal set affinity operation when not capable");
368 #if KMP_OS_LINUX
369  long retval =
370  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
371 #elif KMP_OS_FREEBSD
372  int r =
373  pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
374  int retval = (r == 0 ? 0 : -1);
375 #endif
376  if (retval >= 0) {
377  return 0;
378  }
379  int error = errno;
380  if (abort_on_error) {
381  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
382  }
383  return error;
384  }
385  };
386  void determine_capable(const char *env_var) override {
387  __kmp_affinity_determine_capable(env_var);
388  }
389  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
390  KMPAffinity::Mask *allocate_mask() override {
391  KMPNativeAffinity::Mask *retval = new Mask();
392  return retval;
393  }
394  void deallocate_mask(KMPAffinity::Mask *m) override {
395  KMPNativeAffinity::Mask *native_mask =
396  static_cast<KMPNativeAffinity::Mask *>(m);
397  delete native_mask;
398  }
399  KMPAffinity::Mask *allocate_mask_array(int num) override {
400  return new Mask[num];
401  }
402  void deallocate_mask_array(KMPAffinity::Mask *array) override {
403  Mask *linux_array = static_cast<Mask *>(array);
404  delete[] linux_array;
405  }
406  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
407  int index) override {
408  Mask *linux_array = static_cast<Mask *>(array);
409  return &(linux_array[index]);
410  }
411  api_type get_api_type() const override { return NATIVE_OS; }
412 };
413 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
414 
415 #if KMP_OS_WINDOWS
416 class KMPNativeAffinity : public KMPAffinity {
417  class Mask : public KMPAffinity::Mask {
418  typedef ULONG_PTR mask_t;
419  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
420  mask_t *mask;
421 
422  public:
423  Mask() {
424  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
425  }
426  ~Mask() {
427  if (mask)
428  __kmp_free(mask);
429  }
430  void set(int i) override {
431  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
432  }
433  bool is_set(int i) const override {
434  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
435  }
436  void clear(int i) override {
437  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
438  }
439  void zero() override {
440  for (int i = 0; i < __kmp_num_proc_groups; ++i)
441  mask[i] = 0;
442  }
443  void copy(const KMPAffinity::Mask *src) override {
444  const Mask *convert = static_cast<const Mask *>(src);
445  for (int i = 0; i < __kmp_num_proc_groups; ++i)
446  mask[i] = convert->mask[i];
447  }
448  void bitwise_and(const KMPAffinity::Mask *rhs) override {
449  const Mask *convert = static_cast<const Mask *>(rhs);
450  for (int i = 0; i < __kmp_num_proc_groups; ++i)
451  mask[i] &= convert->mask[i];
452  }
453  void bitwise_or(const KMPAffinity::Mask *rhs) override {
454  const Mask *convert = static_cast<const Mask *>(rhs);
455  for (int i = 0; i < __kmp_num_proc_groups; ++i)
456  mask[i] |= convert->mask[i];
457  }
458  void bitwise_not() override {
459  for (int i = 0; i < __kmp_num_proc_groups; ++i)
460  mask[i] = ~(mask[i]);
461  }
462  int begin() const override {
463  int retval = 0;
464  while (retval < end() && !is_set(retval))
465  ++retval;
466  return retval;
467  }
468  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
469  int next(int previous) const override {
470  int retval = previous + 1;
471  while (retval < end() && !is_set(retval))
472  ++retval;
473  return retval;
474  }
475  int set_process_affinity(bool abort_on_error) const override {
476  if (__kmp_num_proc_groups <= 1) {
477  if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
478  DWORD error = GetLastError();
479  if (abort_on_error) {
480  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
481  __kmp_msg_null);
482  }
483  return error;
484  }
485  }
486  return 0;
487  }
488  int set_system_affinity(bool abort_on_error) const override {
489  if (__kmp_num_proc_groups > 1) {
490  // Check for a valid mask.
491  GROUP_AFFINITY ga;
492  int group = get_proc_group();
493  if (group < 0) {
494  if (abort_on_error) {
495  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
496  }
497  return -1;
498  }
499  // Transform the bit vector into a GROUP_AFFINITY struct
500  // and make the system call to set affinity.
501  ga.Group = group;
502  ga.Mask = mask[group];
503  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
504 
505  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
506  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
507  DWORD error = GetLastError();
508  if (abort_on_error) {
509  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
510  __kmp_msg_null);
511  }
512  return error;
513  }
514  } else {
515  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
516  DWORD error = GetLastError();
517  if (abort_on_error) {
518  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
519  __kmp_msg_null);
520  }
521  return error;
522  }
523  }
524  return 0;
525  }
526  int get_system_affinity(bool abort_on_error) override {
527  if (__kmp_num_proc_groups > 1) {
528  this->zero();
529  GROUP_AFFINITY ga;
530  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
531  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
532  DWORD error = GetLastError();
533  if (abort_on_error) {
534  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
535  KMP_ERR(error), __kmp_msg_null);
536  }
537  return error;
538  }
539  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
540  (ga.Mask == 0)) {
541  return -1;
542  }
543  mask[ga.Group] = ga.Mask;
544  } else {
545  mask_t newMask, sysMask, retval;
546  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
547  DWORD error = GetLastError();
548  if (abort_on_error) {
549  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
550  KMP_ERR(error), __kmp_msg_null);
551  }
552  return error;
553  }
554  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
555  if (!retval) {
556  DWORD error = GetLastError();
557  if (abort_on_error) {
558  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
559  KMP_ERR(error), __kmp_msg_null);
560  }
561  return error;
562  }
563  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
564  if (!newMask) {
565  DWORD error = GetLastError();
566  if (abort_on_error) {
567  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
568  KMP_ERR(error), __kmp_msg_null);
569  }
570  }
571  *mask = retval;
572  }
573  return 0;
574  }
575  int get_proc_group() const override {
576  int group = -1;
577  if (__kmp_num_proc_groups == 1) {
578  return 1;
579  }
580  for (int i = 0; i < __kmp_num_proc_groups; i++) {
581  if (mask[i] == 0)
582  continue;
583  if (group >= 0)
584  return -1;
585  group = i;
586  }
587  return group;
588  }
589  };
590  void determine_capable(const char *env_var) override {
591  __kmp_affinity_determine_capable(env_var);
592  }
593  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
594  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
595  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
596  KMPAffinity::Mask *allocate_mask_array(int num) override {
597  return new Mask[num];
598  }
599  void deallocate_mask_array(KMPAffinity::Mask *array) override {
600  Mask *windows_array = static_cast<Mask *>(array);
601  delete[] windows_array;
602  }
603  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
604  int index) override {
605  Mask *windows_array = static_cast<Mask *>(array);
606  return &(windows_array[index]);
607  }
608  api_type get_api_type() const override { return NATIVE_OS; }
609 };
610 #endif /* KMP_OS_WINDOWS */
611 #endif /* KMP_AFFINITY_SUPPORTED */
612 
613 class Address {
614 public:
615  static const unsigned maxDepth = 32;
616  unsigned labels[maxDepth];
617  unsigned childNums[maxDepth];
618  unsigned depth;
619  unsigned leader;
620  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
621  Address &operator=(const Address &b) {
622  depth = b.depth;
623  for (unsigned i = 0; i < depth; i++) {
624  labels[i] = b.labels[i];
625  childNums[i] = b.childNums[i];
626  }
627  leader = FALSE;
628  return *this;
629  }
630  bool operator==(const Address &b) const {
631  if (depth != b.depth)
632  return false;
633  for (unsigned i = 0; i < depth; i++)
634  if (labels[i] != b.labels[i])
635  return false;
636  return true;
637  }
638  bool isClose(const Address &b, int level) const {
639  if (depth != b.depth)
640  return false;
641  if ((unsigned)level >= depth)
642  return true;
643  for (unsigned i = 0; i < (depth - level); i++)
644  if (labels[i] != b.labels[i])
645  return false;
646  return true;
647  }
648  bool operator!=(const Address &b) const { return !operator==(b); }
649  void print() const {
650  unsigned i;
651  printf("Depth: %u --- ", depth);
652  for (i = 0; i < depth; i++) {
653  printf("%u ", labels[i]);
654  }
655  }
656 };
657 
658 class AddrUnsPair {
659 public:
660  Address first;
661  unsigned second;
662  AddrUnsPair(Address _first, unsigned _second)
663  : first(_first), second(_second) {}
664  AddrUnsPair &operator=(const AddrUnsPair &b) {
665  first = b.first;
666  second = b.second;
667  return *this;
668  }
669  void print() const {
670  printf("first = ");
671  first.print();
672  printf(" --- second = %u", second);
673  }
674  bool operator==(const AddrUnsPair &b) const {
675  if (first != b.first)
676  return false;
677  if (second != b.second)
678  return false;
679  return true;
680  }
681  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
682 };
683 
684 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
685  const Address *aa = &(((const AddrUnsPair *)a)->first);
686  const Address *bb = &(((const AddrUnsPair *)b)->first);
687  unsigned depth = aa->depth;
688  unsigned i;
689  KMP_DEBUG_ASSERT(depth == bb->depth);
690  for (i = 0; i < depth; i++) {
691  if (aa->labels[i] < bb->labels[i])
692  return -1;
693  if (aa->labels[i] > bb->labels[i])
694  return 1;
695  }
696  return 0;
697 }
698 
699 /* A structure for holding machine-specific hierarchy info to be computed once
700  at init. This structure represents a mapping of threads to the actual machine
701  hierarchy, or to our best guess at what the hierarchy might be, for the
702  purpose of performing an efficient barrier. In the worst case, when there is
703  no machine hierarchy information, it produces a tree suitable for a barrier,
704  similar to the tree used in the hyper barrier. */
705 class hierarchy_info {
706 public:
707  /* Good default values for number of leaves and branching factor, given no
708  affinity information. Behaves a bit like hyper barrier. */
709  static const kmp_uint32 maxLeaves = 4;
710  static const kmp_uint32 minBranch = 4;
716  kmp_uint32 maxLevels;
717 
722  kmp_uint32 depth;
723  kmp_uint32 base_num_threads;
724  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
725  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
726  // 2=initialization in progress
727  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
728 
733  kmp_uint32 *numPerLevel;
734  kmp_uint32 *skipPerLevel;
735 
736  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
737  int hier_depth = adr2os[0].first.depth;
738  int level = 0;
739  for (int i = hier_depth - 1; i >= 0; --i) {
740  int max = -1;
741  for (int j = 0; j < num_addrs; ++j) {
742  int next = adr2os[j].first.childNums[i];
743  if (next > max)
744  max = next;
745  }
746  numPerLevel[level] = max + 1;
747  ++level;
748  }
749  }
750 
751  hierarchy_info()
752  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
753 
754  void fini() {
755  if (!uninitialized && numPerLevel) {
756  __kmp_free(numPerLevel);
757  numPerLevel = NULL;
758  uninitialized = not_initialized;
759  }
760  }
761 
762  void init(AddrUnsPair *adr2os, int num_addrs) {
763  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
764  &uninitialized, not_initialized, initializing);
765  if (bool_result == 0) { // Wait for initialization
766  while (TCR_1(uninitialized) != initialized)
767  KMP_CPU_PAUSE();
768  return;
769  }
770  KMP_DEBUG_ASSERT(bool_result == 1);
771 
772  /* Added explicit initialization of the data fields here to prevent usage of
773  dirty value observed when static library is re-initialized multiple times
774  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
775  OpenMP). */
776  depth = 1;
777  resizing = 0;
778  maxLevels = 7;
779  numPerLevel =
780  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
781  skipPerLevel = &(numPerLevel[maxLevels]);
782  for (kmp_uint32 i = 0; i < maxLevels;
783  ++i) { // init numPerLevel[*] to 1 item per level
784  numPerLevel[i] = 1;
785  skipPerLevel[i] = 1;
786  }
787 
788  // Sort table by physical ID
789  if (adr2os) {
790  qsort(adr2os, num_addrs, sizeof(*adr2os),
791  __kmp_affinity_cmp_Address_labels);
792  deriveLevels(adr2os, num_addrs);
793  } else {
794  numPerLevel[0] = maxLeaves;
795  numPerLevel[1] = num_addrs / maxLeaves;
796  if (num_addrs % maxLeaves)
797  numPerLevel[1]++;
798  }
799 
800  base_num_threads = num_addrs;
801  for (int i = maxLevels - 1; i >= 0;
802  --i) // count non-empty levels to get depth
803  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
804  depth++;
805 
806  kmp_uint32 branch = minBranch;
807  if (numPerLevel[0] == 1)
808  branch = num_addrs / maxLeaves;
809  if (branch < minBranch)
810  branch = minBranch;
811  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
812  while (numPerLevel[d] > branch ||
813  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
814  if (numPerLevel[d] & 1)
815  numPerLevel[d]++;
816  numPerLevel[d] = numPerLevel[d] >> 1;
817  if (numPerLevel[d + 1] == 1)
818  depth++;
819  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
820  }
821  if (numPerLevel[0] == 1) {
822  branch = branch >> 1;
823  if (branch < 4)
824  branch = minBranch;
825  }
826  }
827 
828  for (kmp_uint32 i = 1; i < depth; ++i)
829  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
830  // Fill in hierarchy in the case of oversubscription
831  for (kmp_uint32 i = depth; i < maxLevels; ++i)
832  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
833 
834  uninitialized = initialized; // One writer
835  }
836 
837  // Resize the hierarchy if nproc changes to something larger than before
838  void resize(kmp_uint32 nproc) {
839  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
840  while (bool_result == 0) { // someone else is trying to resize
841  KMP_CPU_PAUSE();
842  if (nproc <= base_num_threads) // happy with other thread's resize
843  return;
844  else // try to resize
845  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
846  }
847  KMP_DEBUG_ASSERT(bool_result != 0);
848  if (nproc <= base_num_threads)
849  return; // happy with other thread's resize
850 
851  // Calculate new maxLevels
852  kmp_uint32 old_sz = skipPerLevel[depth - 1];
853  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
854  // First see if old maxLevels is enough to contain new size
855  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
856  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
857  numPerLevel[i - 1] *= 2;
858  old_sz *= 2;
859  depth++;
860  }
861  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
862  while (nproc > old_sz) {
863  old_sz *= 2;
864  incs++;
865  depth++;
866  }
867  maxLevels += incs;
868 
869  // Resize arrays
870  kmp_uint32 *old_numPerLevel = numPerLevel;
871  kmp_uint32 *old_skipPerLevel = skipPerLevel;
872  numPerLevel = skipPerLevel = NULL;
873  numPerLevel =
874  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
875  skipPerLevel = &(numPerLevel[maxLevels]);
876 
877  // Copy old elements from old arrays
878  for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
879  // init numPerLevel[*] to 1 item per level
880  numPerLevel[i] = old_numPerLevel[i];
881  skipPerLevel[i] = old_skipPerLevel[i];
882  }
883 
884  // Init new elements in arrays to 1
885  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
886  // init numPerLevel[*] to 1 item per level
887  numPerLevel[i] = 1;
888  skipPerLevel[i] = 1;
889  }
890 
891  // Free old arrays
892  __kmp_free(old_numPerLevel);
893  }
894 
895  // Fill in oversubscription levels of hierarchy
896  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
897  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
898 
899  base_num_threads = nproc;
900  resizing = 0; // One writer
901  }
902 };
903 #endif // KMP_AFFINITY_H