LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 #include <limits>
19 
20 #if KMP_AFFINITY_SUPPORTED
21 #if KMP_USE_HWLOC
22 class KMPHwlocAffinity : public KMPAffinity {
23 public:
24  class Mask : public KMPAffinity::Mask {
25  hwloc_cpuset_t mask;
26 
27  public:
28  Mask() {
29  mask = hwloc_bitmap_alloc();
30  this->zero();
31  }
32  ~Mask() { hwloc_bitmap_free(mask); }
33  void set(int i) override { hwloc_bitmap_set(mask, i); }
34  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
35  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
36  void zero() override { hwloc_bitmap_zero(mask); }
37  void copy(const KMPAffinity::Mask *src) override {
38  const Mask *convert = static_cast<const Mask *>(src);
39  hwloc_bitmap_copy(mask, convert->mask);
40  }
41  void bitwise_and(const KMPAffinity::Mask *rhs) override {
42  const Mask *convert = static_cast<const Mask *>(rhs);
43  hwloc_bitmap_and(mask, mask, convert->mask);
44  }
45  void bitwise_or(const KMPAffinity::Mask *rhs) override {
46  const Mask *convert = static_cast<const Mask *>(rhs);
47  hwloc_bitmap_or(mask, mask, convert->mask);
48  }
49  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
50  int begin() const override { return hwloc_bitmap_first(mask); }
51  int end() const override { return -1; }
52  int next(int previous) const override {
53  return hwloc_bitmap_next(mask, previous);
54  }
55  int get_system_affinity(bool abort_on_error) override {
56  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57  "Illegal get affinity operation when not capable");
58  long retval =
59  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
60  if (retval >= 0) {
61  return 0;
62  }
63  int error = errno;
64  if (abort_on_error) {
65  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
66  }
67  return error;
68  }
69  int set_system_affinity(bool abort_on_error) const override {
70  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
71  "Illegal set affinity operation when not capable");
72  long retval =
73  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
74  if (retval >= 0) {
75  return 0;
76  }
77  int error = errno;
78  if (abort_on_error) {
79  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
80  }
81  return error;
82  }
83 #if KMP_OS_WINDOWS
84  int set_process_affinity(bool abort_on_error) const override {
85  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
86  "Illegal set process affinity operation when not capable");
87  int error = 0;
88  const hwloc_topology_support *support =
89  hwloc_topology_get_support(__kmp_hwloc_topology);
90  if (support->cpubind->set_proc_cpubind) {
91  int retval;
92  retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
93  HWLOC_CPUBIND_PROCESS);
94  if (retval >= 0)
95  return 0;
96  error = errno;
97  if (abort_on_error)
98  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
99  }
100  return error;
101  }
102 #endif
103  int get_proc_group() const override {
104  int group = -1;
105 #if KMP_OS_WINDOWS
106  if (__kmp_num_proc_groups == 1) {
107  return 1;
108  }
109  for (int i = 0; i < __kmp_num_proc_groups; i++) {
110  // On windows, the long type is always 32 bits
111  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
112  unsigned long second_32_bits =
113  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
114  if (first_32_bits == 0 && second_32_bits == 0) {
115  continue;
116  }
117  if (group >= 0) {
118  return -1;
119  }
120  group = i;
121  }
122 #endif /* KMP_OS_WINDOWS */
123  return group;
124  }
125  };
126  void determine_capable(const char *var) override {
127  const hwloc_topology_support *topology_support;
128  if (__kmp_hwloc_topology == NULL) {
129  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
130  __kmp_hwloc_error = TRUE;
131  if (__kmp_affinity_verbose)
132  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
133  }
134  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
135  __kmp_hwloc_error = TRUE;
136  if (__kmp_affinity_verbose)
137  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
138  }
139  }
140  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
141  // Is the system capable of setting/getting this thread's affinity?
142  // Also, is topology discovery possible? (pu indicates ability to discover
143  // processing units). And finally, were there no errors when calling any
144  // hwloc_* API functions?
145  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
146  topology_support->cpubind->get_thisthread_cpubind &&
147  topology_support->discovery->pu && !__kmp_hwloc_error) {
148  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
149  KMP_AFFINITY_ENABLE(TRUE);
150  } else {
151  // indicate that hwloc didn't work and disable affinity
152  __kmp_hwloc_error = TRUE;
153  KMP_AFFINITY_DISABLE();
154  }
155  }
156  void bind_thread(int which) override {
157  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
158  "Illegal set affinity operation when not capable");
159  KMPAffinity::Mask *mask;
160  KMP_CPU_ALLOC_ON_STACK(mask);
161  KMP_CPU_ZERO(mask);
162  KMP_CPU_SET(which, mask);
163  __kmp_set_system_affinity(mask, TRUE);
164  KMP_CPU_FREE_FROM_STACK(mask);
165  }
166  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
167  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
168  KMPAffinity::Mask *allocate_mask_array(int num) override {
169  return new Mask[num];
170  }
171  void deallocate_mask_array(KMPAffinity::Mask *array) override {
172  Mask *hwloc_array = static_cast<Mask *>(array);
173  delete[] hwloc_array;
174  }
175  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
176  int index) override {
177  Mask *hwloc_array = static_cast<Mask *>(array);
178  return &(hwloc_array[index]);
179  }
180  api_type get_api_type() const override { return HWLOC; }
181 };
182 #endif /* KMP_USE_HWLOC */
183 
184 #if KMP_OS_LINUX || KMP_OS_FREEBSD
185 #if KMP_OS_LINUX
186 /* On some of the older OS's that we build on, these constants aren't present
187  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
188  all systems of the same arch where they are defined, and they cannot change.
189  stone forever. */
190 #include <sys/syscall.h>
191 #if KMP_ARCH_X86 || KMP_ARCH_ARM
192 #ifndef __NR_sched_setaffinity
193 #define __NR_sched_setaffinity 241
194 #elif __NR_sched_setaffinity != 241
195 #error Wrong code for setaffinity system call.
196 #endif /* __NR_sched_setaffinity */
197 #ifndef __NR_sched_getaffinity
198 #define __NR_sched_getaffinity 242
199 #elif __NR_sched_getaffinity != 242
200 #error Wrong code for getaffinity system call.
201 #endif /* __NR_sched_getaffinity */
202 #elif KMP_ARCH_AARCH64
203 #ifndef __NR_sched_setaffinity
204 #define __NR_sched_setaffinity 122
205 #elif __NR_sched_setaffinity != 122
206 #error Wrong code for setaffinity system call.
207 #endif /* __NR_sched_setaffinity */
208 #ifndef __NR_sched_getaffinity
209 #define __NR_sched_getaffinity 123
210 #elif __NR_sched_getaffinity != 123
211 #error Wrong code for getaffinity system call.
212 #endif /* __NR_sched_getaffinity */
213 #elif KMP_ARCH_RISCV64
214 #ifndef __NR_sched_setaffinity
215 #define __NR_sched_setaffinity 122
216 #elif __NR_sched_setaffinity != 122
217 #error Wrong code for setaffinity system call.
218 #endif /* __NR_sched_setaffinity */
219 #ifndef __NR_sched_getaffinity
220 #define __NR_sched_getaffinity 123
221 #elif __NR_sched_getaffinity != 123
222 #error Wrong code for getaffinity system call.
223 #endif /* __NR_sched_getaffinity */
224 #elif KMP_ARCH_X86_64
225 #ifndef __NR_sched_setaffinity
226 #define __NR_sched_setaffinity 203
227 #elif __NR_sched_setaffinity != 203
228 #error Wrong code for setaffinity system call.
229 #endif /* __NR_sched_setaffinity */
230 #ifndef __NR_sched_getaffinity
231 #define __NR_sched_getaffinity 204
232 #elif __NR_sched_getaffinity != 204
233 #error Wrong code for getaffinity system call.
234 #endif /* __NR_sched_getaffinity */
235 #elif KMP_ARCH_PPC64
236 #ifndef __NR_sched_setaffinity
237 #define __NR_sched_setaffinity 222
238 #elif __NR_sched_setaffinity != 222
239 #error Wrong code for setaffinity system call.
240 #endif /* __NR_sched_setaffinity */
241 #ifndef __NR_sched_getaffinity
242 #define __NR_sched_getaffinity 223
243 #elif __NR_sched_getaffinity != 223
244 #error Wrong code for getaffinity system call.
245 #endif /* __NR_sched_getaffinity */
246 # elif KMP_ARCH_MIPS
247 # ifndef __NR_sched_setaffinity
248 # define __NR_sched_setaffinity 4239
249 # elif __NR_sched_setaffinity != 4239
250 # error Wrong code for setaffinity system call.
251 # endif /* __NR_sched_setaffinity */
252 # ifndef __NR_sched_getaffinity
253 # define __NR_sched_getaffinity 4240
254 # elif __NR_sched_getaffinity != 4240
255 # error Wrong code for getaffinity system call.
256 # endif /* __NR_sched_getaffinity */
257 # elif KMP_ARCH_MIPS64
258 # ifndef __NR_sched_setaffinity
259 # define __NR_sched_setaffinity 5195
260 # elif __NR_sched_setaffinity != 5195
261 # error Wrong code for setaffinity system call.
262 # endif /* __NR_sched_setaffinity */
263 # ifndef __NR_sched_getaffinity
264 # define __NR_sched_getaffinity 5196
265 # elif __NR_sched_getaffinity != 5196
266 # error Wrong code for getaffinity system call.
267 # endif /* __NR_sched_getaffinity */
268 # else
269 #error Unknown or unsupported architecture
270 #endif /* KMP_ARCH_* */
271 #elif KMP_OS_FREEBSD
272 #include <pthread.h>
273 #include <pthread_np.h>
274 #endif
275 class KMPNativeAffinity : public KMPAffinity {
276  class Mask : public KMPAffinity::Mask {
277  typedef unsigned long mask_t;
278  typedef decltype(__kmp_affin_mask_size) mask_size_type;
279  static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
280  static const mask_t ONE = 1;
281  mask_size_type get_num_mask_types() const {
282  return __kmp_affin_mask_size / sizeof(mask_t);
283  }
284 
285  public:
286  mask_t *mask;
287  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
288  ~Mask() {
289  if (mask)
290  __kmp_free(mask);
291  }
292  void set(int i) override {
293  mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
294  }
295  bool is_set(int i) const override {
296  return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
297  }
298  void clear(int i) override {
299  mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
300  }
301  void zero() override {
302  mask_size_type e = get_num_mask_types();
303  for (mask_size_type i = 0; i < e; ++i)
304  mask[i] = (mask_t)0;
305  }
306  void copy(const KMPAffinity::Mask *src) override {
307  const Mask *convert = static_cast<const Mask *>(src);
308  mask_size_type e = get_num_mask_types();
309  for (mask_size_type i = 0; i < e; ++i)
310  mask[i] = convert->mask[i];
311  }
312  void bitwise_and(const KMPAffinity::Mask *rhs) override {
313  const Mask *convert = static_cast<const Mask *>(rhs);
314  mask_size_type e = get_num_mask_types();
315  for (mask_size_type i = 0; i < e; ++i)
316  mask[i] &= convert->mask[i];
317  }
318  void bitwise_or(const KMPAffinity::Mask *rhs) override {
319  const Mask *convert = static_cast<const Mask *>(rhs);
320  mask_size_type e = get_num_mask_types();
321  for (mask_size_type i = 0; i < e; ++i)
322  mask[i] |= convert->mask[i];
323  }
324  void bitwise_not() override {
325  mask_size_type e = get_num_mask_types();
326  for (mask_size_type i = 0; i < e; ++i)
327  mask[i] = ~(mask[i]);
328  }
329  int begin() const override {
330  int retval = 0;
331  while (retval < end() && !is_set(retval))
332  ++retval;
333  return retval;
334  }
335  int end() const override {
336  int e;
337  __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
338  return e;
339  }
340  int next(int previous) const override {
341  int retval = previous + 1;
342  while (retval < end() && !is_set(retval))
343  ++retval;
344  return retval;
345  }
346  int get_system_affinity(bool abort_on_error) override {
347  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
348  "Illegal get affinity operation when not capable");
349 #if KMP_OS_LINUX
350  long retval =
351  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
352 #elif KMP_OS_FREEBSD
353  int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
354  reinterpret_cast<cpuset_t *>(mask));
355  int retval = (r == 0 ? 0 : -1);
356 #endif
357  if (retval >= 0) {
358  return 0;
359  }
360  int error = errno;
361  if (abort_on_error) {
362  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
363  }
364  return error;
365  }
366  int set_system_affinity(bool abort_on_error) const override {
367  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
368  "Illegal set affinity operation when not capable");
369 #if KMP_OS_LINUX
370  long retval =
371  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
372 #elif KMP_OS_FREEBSD
373  int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
374  reinterpret_cast<cpuset_t *>(mask));
375  int retval = (r == 0 ? 0 : -1);
376 #endif
377  if (retval >= 0) {
378  return 0;
379  }
380  int error = errno;
381  if (abort_on_error) {
382  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
383  }
384  return error;
385  }
386  };
387  void determine_capable(const char *env_var) override {
388  __kmp_affinity_determine_capable(env_var);
389  }
390  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
391  KMPAffinity::Mask *allocate_mask() override {
392  KMPNativeAffinity::Mask *retval = new Mask();
393  return retval;
394  }
395  void deallocate_mask(KMPAffinity::Mask *m) override {
396  KMPNativeAffinity::Mask *native_mask =
397  static_cast<KMPNativeAffinity::Mask *>(m);
398  delete native_mask;
399  }
400  KMPAffinity::Mask *allocate_mask_array(int num) override {
401  return new Mask[num];
402  }
403  void deallocate_mask_array(KMPAffinity::Mask *array) override {
404  Mask *linux_array = static_cast<Mask *>(array);
405  delete[] linux_array;
406  }
407  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
408  int index) override {
409  Mask *linux_array = static_cast<Mask *>(array);
410  return &(linux_array[index]);
411  }
412  api_type get_api_type() const override { return NATIVE_OS; }
413 };
414 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
415 
416 #if KMP_OS_WINDOWS
417 class KMPNativeAffinity : public KMPAffinity {
418  class Mask : public KMPAffinity::Mask {
419  typedef ULONG_PTR mask_t;
420  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
421  mask_t *mask;
422 
423  public:
424  Mask() {
425  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
426  }
427  ~Mask() {
428  if (mask)
429  __kmp_free(mask);
430  }
431  void set(int i) override {
432  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
433  }
434  bool is_set(int i) const override {
435  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
436  }
437  void clear(int i) override {
438  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
439  }
440  void zero() override {
441  for (int i = 0; i < __kmp_num_proc_groups; ++i)
442  mask[i] = 0;
443  }
444  void copy(const KMPAffinity::Mask *src) override {
445  const Mask *convert = static_cast<const Mask *>(src);
446  for (int i = 0; i < __kmp_num_proc_groups; ++i)
447  mask[i] = convert->mask[i];
448  }
449  void bitwise_and(const KMPAffinity::Mask *rhs) override {
450  const Mask *convert = static_cast<const Mask *>(rhs);
451  for (int i = 0; i < __kmp_num_proc_groups; ++i)
452  mask[i] &= convert->mask[i];
453  }
454  void bitwise_or(const KMPAffinity::Mask *rhs) override {
455  const Mask *convert = static_cast<const Mask *>(rhs);
456  for (int i = 0; i < __kmp_num_proc_groups; ++i)
457  mask[i] |= convert->mask[i];
458  }
459  void bitwise_not() override {
460  for (int i = 0; i < __kmp_num_proc_groups; ++i)
461  mask[i] = ~(mask[i]);
462  }
463  int begin() const override {
464  int retval = 0;
465  while (retval < end() && !is_set(retval))
466  ++retval;
467  return retval;
468  }
469  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
470  int next(int previous) const override {
471  int retval = previous + 1;
472  while (retval < end() && !is_set(retval))
473  ++retval;
474  return retval;
475  }
476  int set_process_affinity(bool abort_on_error) const override {
477  if (__kmp_num_proc_groups <= 1) {
478  if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
479  DWORD error = GetLastError();
480  if (abort_on_error) {
481  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
482  __kmp_msg_null);
483  }
484  return error;
485  }
486  }
487  return 0;
488  }
489  int set_system_affinity(bool abort_on_error) const override {
490  if (__kmp_num_proc_groups > 1) {
491  // Check for a valid mask.
492  GROUP_AFFINITY ga;
493  int group = get_proc_group();
494  if (group < 0) {
495  if (abort_on_error) {
496  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
497  }
498  return -1;
499  }
500  // Transform the bit vector into a GROUP_AFFINITY struct
501  // and make the system call to set affinity.
502  ga.Group = group;
503  ga.Mask = mask[group];
504  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
505 
506  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
507  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
508  DWORD error = GetLastError();
509  if (abort_on_error) {
510  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
511  __kmp_msg_null);
512  }
513  return error;
514  }
515  } else {
516  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
517  DWORD error = GetLastError();
518  if (abort_on_error) {
519  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
520  __kmp_msg_null);
521  }
522  return error;
523  }
524  }
525  return 0;
526  }
527  int get_system_affinity(bool abort_on_error) override {
528  if (__kmp_num_proc_groups > 1) {
529  this->zero();
530  GROUP_AFFINITY ga;
531  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
532  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
533  DWORD error = GetLastError();
534  if (abort_on_error) {
535  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
536  KMP_ERR(error), __kmp_msg_null);
537  }
538  return error;
539  }
540  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
541  (ga.Mask == 0)) {
542  return -1;
543  }
544  mask[ga.Group] = ga.Mask;
545  } else {
546  mask_t newMask, sysMask, retval;
547  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
548  DWORD error = GetLastError();
549  if (abort_on_error) {
550  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
551  KMP_ERR(error), __kmp_msg_null);
552  }
553  return error;
554  }
555  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
556  if (!retval) {
557  DWORD error = GetLastError();
558  if (abort_on_error) {
559  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
560  KMP_ERR(error), __kmp_msg_null);
561  }
562  return error;
563  }
564  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
565  if (!newMask) {
566  DWORD error = GetLastError();
567  if (abort_on_error) {
568  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
569  KMP_ERR(error), __kmp_msg_null);
570  }
571  }
572  *mask = retval;
573  }
574  return 0;
575  }
576  int get_proc_group() const override {
577  int group = -1;
578  if (__kmp_num_proc_groups == 1) {
579  return 1;
580  }
581  for (int i = 0; i < __kmp_num_proc_groups; i++) {
582  if (mask[i] == 0)
583  continue;
584  if (group >= 0)
585  return -1;
586  group = i;
587  }
588  return group;
589  }
590  };
591  void determine_capable(const char *env_var) override {
592  __kmp_affinity_determine_capable(env_var);
593  }
594  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
595  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
596  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
597  KMPAffinity::Mask *allocate_mask_array(int num) override {
598  return new Mask[num];
599  }
600  void deallocate_mask_array(KMPAffinity::Mask *array) override {
601  Mask *windows_array = static_cast<Mask *>(array);
602  delete[] windows_array;
603  }
604  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
605  int index) override {
606  Mask *windows_array = static_cast<Mask *>(array);
607  return &(windows_array[index]);
608  }
609  api_type get_api_type() const override { return NATIVE_OS; }
610 };
611 #endif /* KMP_OS_WINDOWS */
612 #endif /* KMP_AFFINITY_SUPPORTED */
613 
614 // Describe an attribute for a level in the machine topology
615 struct kmp_hw_attr_t {
616  int core_type : 8;
617  int core_eff : 8;
618  unsigned valid : 1;
619  unsigned reserved : 15;
620 
621  static const int UNKNOWN_CORE_EFF = -1;
622 
623  kmp_hw_attr_t()
624  : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
625  valid(0), reserved(0) {}
626  void set_core_type(kmp_hw_core_type_t type) {
627  valid = 1;
628  core_type = type;
629  }
630  void set_core_eff(int eff) {
631  valid = 1;
632  core_eff = eff;
633  }
634  kmp_hw_core_type_t get_core_type() const {
635  return (kmp_hw_core_type_t)core_type;
636  }
637  int get_core_eff() const { return core_eff; }
638  bool is_core_type_valid() const {
639  return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
640  }
641  bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; }
642  operator bool() const { return valid; }
643  void clear() {
644  core_type = KMP_HW_CORE_TYPE_UNKNOWN;
645  core_eff = UNKNOWN_CORE_EFF;
646  valid = 0;
647  }
648  bool contains(const kmp_hw_attr_t &other) const {
649  if (!valid && !other.valid)
650  return true;
651  if (valid && other.valid) {
652  if (other.is_core_type_valid()) {
653  if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
654  return false;
655  }
656  if (other.is_core_eff_valid()) {
657  if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
658  return false;
659  }
660  return true;
661  }
662  return false;
663  }
664  bool operator==(const kmp_hw_attr_t &rhs) const {
665  return (rhs.valid == valid && rhs.core_eff == core_eff &&
666  rhs.core_type == core_type);
667  }
668  bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); }
669 };
670 
671 class kmp_hw_thread_t {
672 public:
673  static const int UNKNOWN_ID = -1;
674  static int compare_ids(const void *a, const void *b);
675  static int compare_compact(const void *a, const void *b);
676  int ids[KMP_HW_LAST];
677  int sub_ids[KMP_HW_LAST];
678  bool leader;
679  int os_id;
680  kmp_hw_attr_t attrs;
681 
682  void print() const;
683  void clear() {
684  for (int i = 0; i < (int)KMP_HW_LAST; ++i)
685  ids[i] = UNKNOWN_ID;
686  leader = false;
687  attrs.clear();
688  }
689 };
690 
691 class kmp_topology_t {
692 
693  struct flags_t {
694  int uniform : 1;
695  int reserved : 31;
696  };
697 
698  int depth;
699 
700  // The following arrays are all 'depth' long and have been
701  // allocated to hold up to KMP_HW_LAST number of objects if
702  // needed so layers can be added without reallocation of any array
703 
704  // Orderd array of the types in the topology
705  kmp_hw_t *types;
706 
707  // Keep quick topology ratios, for non-uniform topologies,
708  // this ratio holds the max number of itemAs per itemB
709  // e.g., [ 4 packages | 6 cores / package | 2 threads / core ]
710  int *ratio;
711 
712  // Storage containing the absolute number of each topology layer
713  int *count;
714 
715  // The number of core efficiencies. This is only useful for hybrid
716  // topologies. Core efficiencies will range from 0 to num efficiencies - 1
717  int num_core_efficiencies;
718  int num_core_types;
719  kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
720 
721  // The hardware threads array
722  // hw_threads is num_hw_threads long
723  // Each hw_thread's ids and sub_ids are depth deep
724  int num_hw_threads;
725  kmp_hw_thread_t *hw_threads;
726 
727  // Equivalence hash where the key is the hardware topology item
728  // and the value is the equivalent hardware topology type in the
729  // types[] array, if the value is KMP_HW_UNKNOWN, then there is no
730  // known equivalence for the topology type
731  kmp_hw_t equivalent[KMP_HW_LAST];
732 
733  // Flags describing the topology
734  flags_t flags;
735 
736  // Insert a new topology layer after allocation
737  void _insert_layer(kmp_hw_t type, const int *ids);
738 
739 #if KMP_GROUP_AFFINITY
740  // Insert topology information about Windows Processor groups
741  void _insert_windows_proc_groups();
742 #endif
743 
744  // Count each item & get the num x's per y
745  // e.g., get the number of cores and the number of threads per core
746  // for each (x, y) in (KMP_HW_* , KMP_HW_*)
747  void _gather_enumeration_information();
748 
749  // Remove layers that don't add information to the topology.
750  // This is done by having the layer take on the id = UNKNOWN_ID (-1)
751  void _remove_radix1_layers();
752 
753  // Find out if the topology is uniform
754  void _discover_uniformity();
755 
756  // Set all the sub_ids for each hardware thread
757  void _set_sub_ids();
758 
759  // Set global affinity variables describing the number of threads per
760  // core, the number of packages, the number of cores per package, and
761  // the number of cores.
762  void _set_globals();
763 
764  // Set the last level cache equivalent type
765  void _set_last_level_cache();
766 
767  // Return the number of cores with a particular attribute, 'attr'.
768  // If 'find_all' is true, then find all cores on the machine, otherwise find
769  // all cores per the layer 'above'
770  int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above,
771  bool find_all = false) const;
772 
773 public:
774  // Force use of allocate()/deallocate()
775  kmp_topology_t() = delete;
776  kmp_topology_t(const kmp_topology_t &t) = delete;
777  kmp_topology_t(kmp_topology_t &&t) = delete;
778  kmp_topology_t &operator=(const kmp_topology_t &t) = delete;
779  kmp_topology_t &operator=(kmp_topology_t &&t) = delete;
780 
781  static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types);
782  static void deallocate(kmp_topology_t *);
783 
784  // Functions used in create_map() routines
785  kmp_hw_thread_t &at(int index) {
786  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
787  return hw_threads[index];
788  }
789  const kmp_hw_thread_t &at(int index) const {
790  KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
791  return hw_threads[index];
792  }
793  int get_num_hw_threads() const { return num_hw_threads; }
794  void sort_ids() {
795  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
796  kmp_hw_thread_t::compare_ids);
797  }
798  // Check if the hardware ids are unique, if they are
799  // return true, otherwise return false
800  bool check_ids() const;
801 
802  // Function to call after the create_map() routine
803  void canonicalize();
804  void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
805 
806  // Functions used after canonicalize() called
807  bool filter_hw_subset();
808  bool is_close(int hwt1, int hwt2, int level) const;
809  bool is_uniform() const { return flags.uniform; }
810  // Tell whether a type is a valid type in the topology
811  // returns KMP_HW_UNKNOWN when there is no equivalent type
812  kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
813  // Set type1 = type2
814  void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
815  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
816  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
817  kmp_hw_t real_type2 = equivalent[type2];
818  if (real_type2 == KMP_HW_UNKNOWN)
819  real_type2 = type2;
820  equivalent[type1] = real_type2;
821  // This loop is required since any of the types may have been set to
822  // be equivalent to type1. They all must be checked and reset to type2.
823  KMP_FOREACH_HW_TYPE(type) {
824  if (equivalent[type] == type1) {
825  equivalent[type] = real_type2;
826  }
827  }
828  }
829  // Calculate number of types corresponding to level1
830  // per types corresponding to level2 (e.g., number of threads per core)
831  int calculate_ratio(int level1, int level2) const {
832  KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
833  KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
834  int r = 1;
835  for (int level = level1; level > level2; --level)
836  r *= ratio[level];
837  return r;
838  }
839  int get_ratio(int level) const {
840  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
841  return ratio[level];
842  }
843  int get_depth() const { return depth; };
844  kmp_hw_t get_type(int level) const {
845  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
846  return types[level];
847  }
848  int get_level(kmp_hw_t type) const {
849  KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
850  int eq_type = equivalent[type];
851  if (eq_type == KMP_HW_UNKNOWN)
852  return -1;
853  for (int i = 0; i < depth; ++i)
854  if (types[i] == eq_type)
855  return i;
856  return -1;
857  }
858  int get_count(int level) const {
859  KMP_DEBUG_ASSERT(level >= 0 && level < depth);
860  return count[level];
861  }
862  // Return the total number of cores with attribute 'attr'
863  int get_ncores_with_attr(const kmp_hw_attr_t &attr) const {
864  return _get_ncores_with_attr(attr, -1, true);
865  }
866  // Return the number of cores with attribute
867  // 'attr' per topology level 'above'
868  int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const {
869  return _get_ncores_with_attr(attr, above, false);
870  }
871 
872 #if KMP_AFFINITY_SUPPORTED
873  void sort_compact() {
874  qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
875  kmp_hw_thread_t::compare_compact);
876  }
877 #endif
878  void print(const char *env_var = "KMP_AFFINITY") const;
879  void dump() const;
880 };
881 extern kmp_topology_t *__kmp_topology;
882 
883 class kmp_hw_subset_t {
884  const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
885 
886 public:
887  // Describe a machine topology item in KMP_HW_SUBSET
888  struct item_t {
889  kmp_hw_t type;
890  int num_attrs;
891  int num[MAX_ATTRS];
892  int offset[MAX_ATTRS];
893  kmp_hw_attr_t attr[MAX_ATTRS];
894  };
895  // Put parenthesis around max to avoid accidental use of Windows max macro.
896  const static int USE_ALL = (std::numeric_limits<int>::max)();
897 
898 private:
899  int depth;
900  int capacity;
901  item_t *items;
902  kmp_uint64 set;
903  bool absolute;
904  // The set must be able to handle up to KMP_HW_LAST number of layers
905  KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST);
906  // Sorting the KMP_HW_SUBSET items to follow topology order
907  // All unknown topology types will be at the beginning of the subset
908  static int hw_subset_compare(const void *i1, const void *i2) {
909  kmp_hw_t type1 = ((const item_t *)i1)->type;
910  kmp_hw_t type2 = ((const item_t *)i2)->type;
911  int level1 = __kmp_topology->get_level(type1);
912  int level2 = __kmp_topology->get_level(type2);
913  return level1 - level2;
914  }
915 
916 public:
917  // Force use of allocate()/deallocate()
918  kmp_hw_subset_t() = delete;
919  kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete;
920  kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete;
921  kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete;
922  kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete;
923 
924  static kmp_hw_subset_t *allocate() {
925  int initial_capacity = 5;
926  kmp_hw_subset_t *retval =
927  (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t));
928  retval->depth = 0;
929  retval->capacity = initial_capacity;
930  retval->set = 0ull;
931  retval->absolute = false;
932  retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity);
933  return retval;
934  }
935  static void deallocate(kmp_hw_subset_t *subset) {
936  __kmp_free(subset->items);
937  __kmp_free(subset);
938  }
939  void set_absolute() { absolute = true; }
940  bool is_absolute() const { return absolute; }
941  void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) {
942  for (int i = 0; i < depth; ++i) {
943  // Found an existing item for this layer type
944  // Add the num, offset, and attr to this item
945  if (items[i].type == type) {
946  int idx = items[i].num_attrs++;
947  if ((size_t)idx >= MAX_ATTRS)
948  return;
949  items[i].num[idx] = num;
950  items[i].offset[idx] = offset;
951  items[i].attr[idx] = attr;
952  return;
953  }
954  }
955  if (depth == capacity - 1) {
956  capacity *= 2;
957  item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity);
958  for (int i = 0; i < depth; ++i)
959  new_items[i] = items[i];
960  __kmp_free(items);
961  items = new_items;
962  }
963  items[depth].num_attrs = 1;
964  items[depth].type = type;
965  items[depth].num[0] = num;
966  items[depth].offset[0] = offset;
967  items[depth].attr[0] = attr;
968  depth++;
969  set |= (1ull << type);
970  }
971  int get_depth() const { return depth; }
972  const item_t &at(int index) const {
973  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
974  return items[index];
975  }
976  item_t &at(int index) {
977  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
978  return items[index];
979  }
980  void remove(int index) {
981  KMP_DEBUG_ASSERT(index >= 0 && index < depth);
982  set &= ~(1ull << items[index].type);
983  for (int j = index + 1; j < depth; ++j) {
984  items[j - 1] = items[j];
985  }
986  depth--;
987  }
988  void sort() {
989  KMP_DEBUG_ASSERT(__kmp_topology);
990  qsort(items, depth, sizeof(item_t), hw_subset_compare);
991  }
992  bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); }
993  void dump() const {
994  printf("**********************\n");
995  printf("*** kmp_hw_subset: ***\n");
996  printf("* depth: %d\n", depth);
997  printf("* items:\n");
998  for (int i = 0; i < depth; ++i) {
999  printf(" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1000  for (int j = 0; j < items[i].num_attrs; ++j) {
1001  printf(" num: %d, offset: %d, attr: ", items[i].num[j],
1002  items[i].offset[j]);
1003  if (!items[i].attr[j]) {
1004  printf(" (none)\n");
1005  } else {
1006  printf(
1007  " core_type = %s, core_eff = %d\n",
1008  __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1009  items[i].attr[j].get_core_eff());
1010  }
1011  }
1012  }
1013  printf("* set: 0x%llx\n", set);
1014  printf("* absolute: %d\n", absolute);
1015  printf("**********************\n");
1016  }
1017 };
1018 extern kmp_hw_subset_t *__kmp_hw_subset;
1019 
1020 /* A structure for holding machine-specific hierarchy info to be computed once
1021  at init. This structure represents a mapping of threads to the actual machine
1022  hierarchy, or to our best guess at what the hierarchy might be, for the
1023  purpose of performing an efficient barrier. In the worst case, when there is
1024  no machine hierarchy information, it produces a tree suitable for a barrier,
1025  similar to the tree used in the hyper barrier. */
1026 class hierarchy_info {
1027 public:
1028  /* Good default values for number of leaves and branching factor, given no
1029  affinity information. Behaves a bit like hyper barrier. */
1030  static const kmp_uint32 maxLeaves = 4;
1031  static const kmp_uint32 minBranch = 4;
1037  kmp_uint32 maxLevels;
1038 
1043  kmp_uint32 depth;
1044  kmp_uint32 base_num_threads;
1045  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1046  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
1047  // 2=initialization in progress
1048  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
1049 
1054  kmp_uint32 *numPerLevel;
1055  kmp_uint32 *skipPerLevel;
1056 
1057  void deriveLevels() {
1058  int hier_depth = __kmp_topology->get_depth();
1059  for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1060  numPerLevel[level] = __kmp_topology->get_ratio(i);
1061  }
1062  }
1063 
1064  hierarchy_info()
1065  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1066 
1067  void fini() {
1068  if (!uninitialized && numPerLevel) {
1069  __kmp_free(numPerLevel);
1070  numPerLevel = NULL;
1071  uninitialized = not_initialized;
1072  }
1073  }
1074 
1075  void init(int num_addrs) {
1076  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1077  &uninitialized, not_initialized, initializing);
1078  if (bool_result == 0) { // Wait for initialization
1079  while (TCR_1(uninitialized) != initialized)
1080  KMP_CPU_PAUSE();
1081  return;
1082  }
1083  KMP_DEBUG_ASSERT(bool_result == 1);
1084 
1085  /* Added explicit initialization of the data fields here to prevent usage of
1086  dirty value observed when static library is re-initialized multiple times
1087  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
1088  OpenMP). */
1089  depth = 1;
1090  resizing = 0;
1091  maxLevels = 7;
1092  numPerLevel =
1093  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1094  skipPerLevel = &(numPerLevel[maxLevels]);
1095  for (kmp_uint32 i = 0; i < maxLevels;
1096  ++i) { // init numPerLevel[*] to 1 item per level
1097  numPerLevel[i] = 1;
1098  skipPerLevel[i] = 1;
1099  }
1100 
1101  // Sort table by physical ID
1102  if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1103  deriveLevels();
1104  } else {
1105  numPerLevel[0] = maxLeaves;
1106  numPerLevel[1] = num_addrs / maxLeaves;
1107  if (num_addrs % maxLeaves)
1108  numPerLevel[1]++;
1109  }
1110 
1111  base_num_threads = num_addrs;
1112  for (int i = maxLevels - 1; i >= 0;
1113  --i) // count non-empty levels to get depth
1114  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
1115  depth++;
1116 
1117  kmp_uint32 branch = minBranch;
1118  if (numPerLevel[0] == 1)
1119  branch = num_addrs / maxLeaves;
1120  if (branch < minBranch)
1121  branch = minBranch;
1122  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
1123  while (numPerLevel[d] > branch ||
1124  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
1125  if (numPerLevel[d] & 1)
1126  numPerLevel[d]++;
1127  numPerLevel[d] = numPerLevel[d] >> 1;
1128  if (numPerLevel[d + 1] == 1)
1129  depth++;
1130  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1131  }
1132  if (numPerLevel[0] == 1) {
1133  branch = branch >> 1;
1134  if (branch < 4)
1135  branch = minBranch;
1136  }
1137  }
1138 
1139  for (kmp_uint32 i = 1; i < depth; ++i)
1140  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1141  // Fill in hierarchy in the case of oversubscription
1142  for (kmp_uint32 i = depth; i < maxLevels; ++i)
1143  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1144 
1145  uninitialized = initialized; // One writer
1146  }
1147 
1148  // Resize the hierarchy if nproc changes to something larger than before
1149  void resize(kmp_uint32 nproc) {
1150  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1151  while (bool_result == 0) { // someone else is trying to resize
1152  KMP_CPU_PAUSE();
1153  if (nproc <= base_num_threads) // happy with other thread's resize
1154  return;
1155  else // try to resize
1156  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1157  }
1158  KMP_DEBUG_ASSERT(bool_result != 0);
1159  if (nproc <= base_num_threads)
1160  return; // happy with other thread's resize
1161 
1162  // Calculate new maxLevels
1163  kmp_uint32 old_sz = skipPerLevel[depth - 1];
1164  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1165  // First see if old maxLevels is enough to contain new size
1166  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1167  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1168  numPerLevel[i - 1] *= 2;
1169  old_sz *= 2;
1170  depth++;
1171  }
1172  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
1173  while (nproc > old_sz) {
1174  old_sz *= 2;
1175  incs++;
1176  depth++;
1177  }
1178  maxLevels += incs;
1179 
1180  // Resize arrays
1181  kmp_uint32 *old_numPerLevel = numPerLevel;
1182  kmp_uint32 *old_skipPerLevel = skipPerLevel;
1183  numPerLevel = skipPerLevel = NULL;
1184  numPerLevel =
1185  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
1186  skipPerLevel = &(numPerLevel[maxLevels]);
1187 
1188  // Copy old elements from old arrays
1189  for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1190  // init numPerLevel[*] to 1 item per level
1191  numPerLevel[i] = old_numPerLevel[i];
1192  skipPerLevel[i] = old_skipPerLevel[i];
1193  }
1194 
1195  // Init new elements in arrays to 1
1196  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1197  // init numPerLevel[*] to 1 item per level
1198  numPerLevel[i] = 1;
1199  skipPerLevel[i] = 1;
1200  }
1201 
1202  // Free old arrays
1203  __kmp_free(old_numPerLevel);
1204  }
1205 
1206  // Fill in oversubscription levels of hierarchy
1207  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1208  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1209 
1210  base_num_threads = nproc;
1211  resizing = 0; // One writer
1212  }
1213 };
1214 #endif // KMP_AFFINITY_H