14 #include "kmp_affinity.h"
18 #include "kmp_wrapper_getpid.h"
19 #if KMP_USE_HIER_SCHED
20 #include "kmp_dispatch_hier.h"
24 static hierarchy_info machine_hierarchy;
26 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
28 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
32 if (TCR_1(machine_hierarchy.uninitialized))
33 machine_hierarchy.init(NULL, nproc);
36 if (nproc > machine_hierarchy.base_num_threads)
37 machine_hierarchy.resize(nproc);
39 depth = machine_hierarchy.depth;
40 KMP_DEBUG_ASSERT(depth > 0);
42 thr_bar->depth = depth;
43 __kmp_type_convert(machine_hierarchy.numPerLevel[0] - 1,
44 &(thr_bar->base_leaf_kids));
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED
50 bool KMPAffinity::picked_api =
false;
52 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
56 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity *affinity_dispatch;
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
68 affinity_dispatch =
new KMPHwlocAffinity();
72 affinity_dispatch =
new KMPNativeAffinity();
74 __kmp_affinity_dispatch = affinity_dispatch;
78 void KMPAffinity::destroy_api() {
79 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
86 #define KMP_ADVANCE_SCAN(scan) \
87 while (*scan != '\0') { \
95 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
96 kmp_affin_mask_t *mask) {
97 int start = 0, finish = 0, previous = 0;
100 KMP_ASSERT(buf_len >= 40);
103 char *end = buf + buf_len - 1;
106 if (mask->begin() == mask->end()) {
107 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
108 KMP_ADVANCE_SCAN(scan);
109 KMP_ASSERT(scan <= end);
114 start = mask->begin();
118 for (finish = mask->next(start), previous = start;
119 finish == previous + 1 && finish != mask->end();
120 finish = mask->next(finish)) {
127 KMP_SNPRINTF(scan, end - scan + 1,
"%s",
",");
128 KMP_ADVANCE_SCAN(scan);
133 if (previous - start > 1) {
134 KMP_SNPRINTF(scan, end - scan + 1,
"%u-%u", start, previous);
137 KMP_SNPRINTF(scan, end - scan + 1,
"%u", start);
138 KMP_ADVANCE_SCAN(scan);
139 if (previous - start > 0) {
140 KMP_SNPRINTF(scan, end - scan + 1,
",%u", previous);
143 KMP_ADVANCE_SCAN(scan);
146 if (start == mask->end())
154 KMP_ASSERT(scan <= end);
157 #undef KMP_ADVANCE_SCAN
163 kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
164 kmp_affin_mask_t *mask) {
165 int start = 0, finish = 0, previous = 0;
170 __kmp_str_buf_clear(buf);
173 if (mask->begin() == mask->end()) {
174 __kmp_str_buf_print(buf,
"%s",
"{<empty>}");
179 start = mask->begin();
183 for (finish = mask->next(start), previous = start;
184 finish == previous + 1 && finish != mask->end();
185 finish = mask->next(finish)) {
192 __kmp_str_buf_print(buf,
"%s",
",");
197 if (previous - start > 1) {
198 __kmp_str_buf_print(buf,
"%u-%u", start, previous);
201 __kmp_str_buf_print(buf,
"%u", start);
202 if (previous - start > 0) {
203 __kmp_str_buf_print(buf,
",%u", previous);
208 if (start == mask->end())
214 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
217 #if KMP_GROUP_AFFINITY
219 if (__kmp_num_proc_groups > 1) {
221 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
222 for (group = 0; group < __kmp_num_proc_groups; group++) {
224 int num = __kmp_GetActiveProcessorCount(group);
225 for (i = 0; i < num; i++) {
226 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
235 for (proc = 0; proc < __kmp_xproc; proc++) {
236 KMP_CPU_SET(proc, mask);
252 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
254 KMP_DEBUG_ASSERT(numAddrs > 0);
255 int depth = address2os->first.depth;
256 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
257 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
259 for (labCt = 0; labCt < depth; labCt++) {
260 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
261 lastLabel[labCt] = address2os[0].first.labels[labCt];
264 for (i = 1; i < numAddrs; i++) {
265 for (labCt = 0; labCt < depth; labCt++) {
266 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
268 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
270 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
273 lastLabel[labCt] = address2os[i].first.labels[labCt];
277 for (labCt = 0; labCt < depth; labCt++) {
278 address2os[i].first.childNums[labCt] = counts[labCt];
280 for (; labCt < (int)Address::maxDepth; labCt++) {
281 address2os[i].first.childNums[labCt] = 0;
284 __kmp_free(lastLabel);
299 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
301 static int nCoresPerPkg, nPackages;
302 static int __kmp_nThreadsPerCore;
303 #ifndef KMP_DFLT_NTH_CORES
304 static int __kmp_ncores;
306 static int *__kmp_pu_os_idx = NULL;
312 inline static bool __kmp_affinity_uniform_topology() {
313 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
318 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
319 int depth,
int pkgLevel,
320 int coreLevel,
int threadLevel) {
323 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
324 for (proc = 0; proc < len; proc++) {
327 __kmp_str_buf_init(&buf);
328 for (level = 0; level < depth; level++) {
329 if (level == threadLevel) {
330 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
331 }
else if (level == coreLevel) {
332 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
333 }
else if (level == pkgLevel) {
334 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
335 }
else if (level > pkgLevel) {
336 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
337 level - pkgLevel - 1);
339 __kmp_str_buf_print(&buf,
"L%d ", level);
341 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
343 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
345 __kmp_str_buf_free(&buf);
351 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
352 int depth,
int *levels) {
355 __kmp_str_buf_init(&buf);
356 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
357 for (proc = 0; proc < len; proc++) {
358 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
359 addrP[proc].first.labels[0]);
363 if (__kmp_numa_detected)
365 if (levels[level++] > 0)
366 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
367 addrP[proc].first.labels[label++]);
368 if (__kmp_tile_depth > 0)
370 if (levels[level++] > 0)
371 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
372 addrP[proc].first.labels[label++]);
373 if (levels[level++] > 0)
375 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
376 addrP[proc].first.labels[label++]);
377 if (levels[level++] > 0)
379 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
380 addrP[proc].first.labels[label++]);
381 KMP_DEBUG_ASSERT(label == depth);
383 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
384 __kmp_str_buf_clear(&buf);
386 __kmp_str_buf_free(&buf);
389 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
396 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
397 int depth,
int *levels) {
401 int new_depth = depth;
402 for (level = depth - 1; level > 0; --level) {
405 for (i = 1; i < nTh; ++i) {
406 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
412 if (!radix1_detected)
417 if (level == new_depth) {
420 for (i = 0; i < nTh; ++i) {
421 addrP[i].first.depth--;
426 for (j = level; j < new_depth; ++j) {
427 for (i = 0; i < nTh; ++i) {
428 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
429 addrP[i].first.depth--;
442 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
443 hwloc_obj_type_t type) {
446 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
447 obj->logical_index, type, 0);
449 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
451 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
458 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
460 kmp_hwloc_depth_t depth,
462 if (o->depth == depth) {
468 for (
unsigned i = 0; i < o->arity; i++)
469 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
473 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
474 hwloc_obj_type_t type,
476 if (!hwloc_compare_types(o->type, type)) {
482 for (
unsigned i = 0; i < o->arity; i++)
483 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
487 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
489 int &num_active_cores,
490 hwloc_obj_t obj,
int depth,
492 hwloc_obj_t core = NULL;
493 hwloc_topology_t &tp = __kmp_hwloc_topology;
494 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
495 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
496 hwloc_obj_t pu = NULL;
497 KMP_DEBUG_ASSERT(core != NULL);
498 int num_active_threads = 0;
499 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
501 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
502 KMP_DEBUG_ASSERT(pu != NULL);
503 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
505 Address addr(depth + 2);
506 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
507 obj->os_index, obj->logical_index, core->os_index,
508 core->logical_index, pu->os_index, pu->logical_index));
509 for (
int i = 0; i < depth; ++i)
510 addr.labels[i] = labels[i];
511 addr.labels[depth] = core_id;
512 addr.labels[depth + 1] = pu_id;
513 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
514 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
516 ++num_active_threads;
518 if (num_active_threads) {
521 if (num_active_threads > __kmp_nThreadsPerCore)
522 __kmp_nThreadsPerCore = num_active_threads;
530 static int __kmp_hwloc_check_numa() {
531 hwloc_topology_t &tp = __kmp_hwloc_topology;
532 hwloc_obj_t hT, hC, hL, hN, hS;
533 int depth, l2cache_depth, package_depth;
536 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
541 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
542 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
543 KMP_DEBUG_ASSERT(hS != NULL);
544 if (hN != NULL && hN->depth > hS->depth) {
545 __kmp_numa_detected = TRUE;
546 if (__kmp_affinity_gran == affinity_gran_node) {
547 __kmp_affinity_gran = affinity_gran_numa;
551 package_depth = hwloc_get_type_depth(tp, HWLOC_OBJ_PACKAGE);
552 l2cache_depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
554 depth = (l2cache_depth < package_depth) ? package_depth : l2cache_depth;
555 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
558 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
559 __kmp_tile_depth = depth;
563 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
564 kmp_i18n_id_t *
const msg_id) {
565 hwloc_topology_t &tp = __kmp_hwloc_topology;
567 *msg_id = kmp_i18n_null;
570 kmp_affin_mask_t *oldMask;
571 KMP_CPU_ALLOC(oldMask);
572 __kmp_get_system_affinity(oldMask, TRUE);
573 __kmp_hwloc_check_numa();
575 if (!KMP_AFFINITY_CAPABLE()) {
578 KMP_ASSERT(__kmp_affinity_type == affinity_none);
580 hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
582 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
585 o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
587 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
589 __kmp_nThreadsPerCore = 1;
590 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
591 if (nCoresPerPkg == 0)
593 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
594 if (__kmp_affinity_verbose) {
595 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
596 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
597 if (__kmp_affinity_uniform_topology()) {
598 KMP_INFORM(Uniform,
"KMP_AFFINITY");
600 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
602 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
603 __kmp_nThreadsPerCore, __kmp_ncores);
605 KMP_CPU_FREE(oldMask);
610 int levels[5] = {0, 1, 2, 3, 4};
612 if (__kmp_numa_detected)
614 if (__kmp_tile_depth)
618 AddrUnsPair *retval =
619 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
620 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
621 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
628 hwloc_obj_t socket, node, tile;
629 int nActiveThreads = 0;
632 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
633 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
634 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
635 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
637 labels[0] = socket_id;
638 if (__kmp_numa_detected) {
640 int n_active_nodes = 0;
642 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
644 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
646 if (__kmp_tile_depth) {
649 int n_active_tiles = 0;
651 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
653 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
655 int n_active_cores = 0;
656 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
657 n_active_cores, tile, 3, labels);
658 if (n_active_cores) {
660 if (n_active_cores > nCorePerTile)
661 nCorePerTile = n_active_cores;
664 if (n_active_tiles) {
666 if (n_active_tiles > nTilePerNode)
667 nTilePerNode = n_active_tiles;
671 int n_active_cores = 0;
672 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
673 n_active_cores, node, 2, labels);
674 if (n_active_cores) {
676 if (n_active_cores > nCorePerNode)
677 nCorePerNode = n_active_cores;
681 if (n_active_nodes) {
683 if (n_active_nodes > nNodePerPkg)
684 nNodePerPkg = n_active_nodes;
687 if (__kmp_tile_depth) {
690 int n_active_tiles = 0;
692 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
694 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
696 int n_active_cores = 0;
697 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
698 n_active_cores, tile, 2, labels);
699 if (n_active_cores) {
701 if (n_active_cores > nCorePerTile)
702 nCorePerTile = n_active_cores;
705 if (n_active_tiles) {
707 if (n_active_tiles > nTilePerPkg)
708 nTilePerPkg = n_active_tiles;
712 int n_active_cores = 0;
713 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
715 if (n_active_cores) {
717 if (n_active_cores > nCoresPerPkg)
718 nCoresPerPkg = n_active_cores;
725 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
726 KMP_ASSERT(nActiveThreads > 0);
727 if (nActiveThreads == 1) {
728 __kmp_ncores = nPackages = 1;
729 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
730 if (__kmp_affinity_verbose) {
731 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
732 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
733 KMP_INFORM(Uniform,
"KMP_AFFINITY");
734 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
735 __kmp_nThreadsPerCore, __kmp_ncores);
738 if (__kmp_affinity_type == affinity_none) {
740 KMP_CPU_FREE(oldMask);
746 addr.labels[0] = retval[0].first.labels[0];
747 retval[0].first = addr;
749 if (__kmp_affinity_gran_levels < 0) {
750 __kmp_affinity_gran_levels = 0;
753 if (__kmp_affinity_verbose) {
754 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
757 *address2os = retval;
758 KMP_CPU_FREE(oldMask);
763 qsort(retval, nActiveThreads,
sizeof(*retval),
764 __kmp_affinity_cmp_Address_labels);
767 int nPUs = nPackages * __kmp_nThreadsPerCore;
768 if (__kmp_numa_detected) {
769 if (__kmp_tile_depth) {
770 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
772 nPUs *= (nNodePerPkg * nCorePerNode);
775 if (__kmp_tile_depth) {
776 nPUs *= (nTilePerPkg * nCorePerTile);
778 nPUs *= nCoresPerPkg;
781 unsigned uniform = (nPUs == nActiveThreads);
784 if (__kmp_affinity_verbose) {
785 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
787 KMP_INFORM(Uniform,
"KMP_AFFINITY");
789 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
791 if (__kmp_numa_detected) {
792 if (__kmp_tile_depth) {
793 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
794 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
797 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
798 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
799 nPUs *= (nNodePerPkg * nCorePerNode);
802 if (__kmp_tile_depth) {
803 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
804 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
807 __kmp_str_buf_init(&buf);
808 __kmp_str_buf_print(&buf,
"%d", nPackages);
809 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
810 __kmp_nThreadsPerCore, __kmp_ncores);
811 __kmp_str_buf_free(&buf);
816 if (__kmp_affinity_type == affinity_none) {
818 KMP_CPU_FREE(oldMask);
822 int depth_full = depth;
825 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
827 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
828 if (__kmp_affinity_gran_levels < 0) {
831 __kmp_affinity_gran_levels = 0;
832 if (__kmp_affinity_gran > affinity_gran_thread) {
833 for (
int i = 1; i <= depth_full; ++i) {
834 if (__kmp_affinity_gran <= i)
836 if (levels[depth_full - i] > 0)
837 __kmp_affinity_gran_levels++;
840 if (__kmp_affinity_gran > affinity_gran_package)
841 __kmp_affinity_gran_levels++;
844 if (__kmp_affinity_verbose)
845 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
847 KMP_CPU_FREE(oldMask);
848 *address2os = retval;
856 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
857 kmp_i18n_id_t *
const msg_id) {
859 *msg_id = kmp_i18n_null;
864 if (!KMP_AFFINITY_CAPABLE()) {
865 KMP_ASSERT(__kmp_affinity_type == affinity_none);
866 __kmp_ncores = nPackages = __kmp_xproc;
867 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
868 if (__kmp_affinity_verbose) {
869 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
870 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
871 KMP_INFORM(Uniform,
"KMP_AFFINITY");
872 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
873 __kmp_nThreadsPerCore, __kmp_ncores);
882 __kmp_ncores = nPackages = __kmp_avail_proc;
883 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
884 if (__kmp_affinity_verbose) {
885 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
886 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
887 KMP_INFORM(Uniform,
"KMP_AFFINITY");
888 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
889 __kmp_nThreadsPerCore, __kmp_ncores);
891 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
892 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
893 if (__kmp_affinity_type == affinity_none) {
896 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
897 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
899 __kmp_pu_os_idx[avail_ct++] = i;
906 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
909 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
911 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
914 __kmp_pu_os_idx[avail_ct] = i;
917 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
919 if (__kmp_affinity_verbose) {
920 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
923 if (__kmp_affinity_gran_levels < 0) {
926 if (__kmp_affinity_gran > affinity_gran_package) {
927 __kmp_affinity_gran_levels = 1;
929 __kmp_affinity_gran_levels = 0;
935 #if KMP_GROUP_AFFINITY
941 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
942 kmp_i18n_id_t *
const msg_id) {
944 *msg_id = kmp_i18n_null;
948 if (!KMP_AFFINITY_CAPABLE()) {
955 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
956 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
957 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
960 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
962 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
965 __kmp_pu_os_idx[avail_ct] = i;
967 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
968 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
969 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
971 if (__kmp_affinity_verbose) {
972 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
977 if (__kmp_affinity_gran_levels < 0) {
978 if (__kmp_affinity_gran == affinity_gran_group) {
979 __kmp_affinity_gran_levels = 1;
980 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
981 (__kmp_affinity_gran == affinity_gran_thread)) {
982 __kmp_affinity_gran_levels = 0;
984 const char *gran_str = NULL;
985 if (__kmp_affinity_gran == affinity_gran_core) {
987 }
else if (__kmp_affinity_gran == affinity_gran_package) {
988 gran_str =
"package";
989 }
else if (__kmp_affinity_gran == affinity_gran_node) {
997 __kmp_affinity_gran_levels = 0;
1005 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1007 static int __kmp_cpuid_mask_width(
int count) {
1010 while ((1 << r) < count)
1015 class apicThreadInfo {
1019 unsigned maxCoresPerPkg;
1020 unsigned maxThreadsPerPkg;
1026 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
1028 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
1029 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
1030 if (aa->pkgId < bb->pkgId)
1032 if (aa->pkgId > bb->pkgId)
1034 if (aa->coreId < bb->coreId)
1036 if (aa->coreId > bb->coreId)
1038 if (aa->threadId < bb->threadId)
1040 if (aa->threadId > bb->threadId)
1049 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
1050 kmp_i18n_id_t *
const msg_id) {
1053 *msg_id = kmp_i18n_null;
1056 __kmp_x86_cpuid(0, 0, &buf);
1058 *msg_id = kmp_i18n_str_NoLeaf4Support;
1067 if (!KMP_AFFINITY_CAPABLE()) {
1070 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1076 __kmp_x86_cpuid(1, 0, &buf);
1077 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1078 if (maxThreadsPerPkg == 0) {
1079 maxThreadsPerPkg = 1;
1093 __kmp_x86_cpuid(0, 0, &buf);
1095 __kmp_x86_cpuid(4, 0, &buf);
1096 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1114 __kmp_ncores = __kmp_xproc;
1115 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1116 __kmp_nThreadsPerCore = 1;
1117 if (__kmp_affinity_verbose) {
1118 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1119 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1120 if (__kmp_affinity_uniform_topology()) {
1121 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1123 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1125 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1126 __kmp_nThreadsPerCore, __kmp_ncores);
1136 kmp_affin_mask_t *oldMask;
1137 KMP_CPU_ALLOC(oldMask);
1138 KMP_ASSERT(oldMask != NULL);
1139 __kmp_get_system_affinity(oldMask, TRUE);
1167 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1168 __kmp_avail_proc *
sizeof(apicThreadInfo));
1169 unsigned nApics = 0;
1170 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1172 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1175 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1177 __kmp_affinity_dispatch->bind_thread(i);
1178 threadInfo[nApics].osId = i;
1181 __kmp_x86_cpuid(1, 0, &buf);
1182 if (((buf.edx >> 9) & 1) == 0) {
1183 __kmp_set_system_affinity(oldMask, TRUE);
1184 __kmp_free(threadInfo);
1185 KMP_CPU_FREE(oldMask);
1186 *msg_id = kmp_i18n_str_ApicNotPresent;
1189 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1190 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1191 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1192 threadInfo[nApics].maxThreadsPerPkg = 1;
1201 __kmp_x86_cpuid(0, 0, &buf);
1203 __kmp_x86_cpuid(4, 0, &buf);
1204 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1206 threadInfo[nApics].maxCoresPerPkg = 1;
1210 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1211 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1213 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1214 int widthT = widthCT - widthC;
1219 __kmp_set_system_affinity(oldMask, TRUE);
1220 __kmp_free(threadInfo);
1221 KMP_CPU_FREE(oldMask);
1222 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1226 int maskC = (1 << widthC) - 1;
1227 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1229 int maskT = (1 << widthT) - 1;
1230 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1237 __kmp_set_system_affinity(oldMask, TRUE);
1246 KMP_ASSERT(nApics > 0);
1248 __kmp_ncores = nPackages = 1;
1249 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1250 if (__kmp_affinity_verbose) {
1251 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1252 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1253 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1254 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1255 __kmp_nThreadsPerCore, __kmp_ncores);
1258 if (__kmp_affinity_type == affinity_none) {
1259 __kmp_free(threadInfo);
1260 KMP_CPU_FREE(oldMask);
1264 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1266 addr.labels[0] = threadInfo[0].pkgId;
1267 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1269 if (__kmp_affinity_gran_levels < 0) {
1270 __kmp_affinity_gran_levels = 0;
1273 if (__kmp_affinity_verbose) {
1274 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1277 __kmp_free(threadInfo);
1278 KMP_CPU_FREE(oldMask);
1283 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1284 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1301 __kmp_nThreadsPerCore = 1;
1302 unsigned nCores = 1;
1305 unsigned lastPkgId = threadInfo[0].pkgId;
1306 unsigned coreCt = 1;
1307 unsigned lastCoreId = threadInfo[0].coreId;
1308 unsigned threadCt = 1;
1309 unsigned lastThreadId = threadInfo[0].threadId;
1312 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1313 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1315 for (i = 1; i < nApics; i++) {
1316 if (threadInfo[i].pkgId != lastPkgId) {
1319 lastPkgId = threadInfo[i].pkgId;
1320 if ((
int)coreCt > nCoresPerPkg)
1321 nCoresPerPkg = coreCt;
1323 lastCoreId = threadInfo[i].coreId;
1324 if ((
int)threadCt > __kmp_nThreadsPerCore)
1325 __kmp_nThreadsPerCore = threadCt;
1327 lastThreadId = threadInfo[i].threadId;
1331 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1332 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1336 if (threadInfo[i].coreId != lastCoreId) {
1339 lastCoreId = threadInfo[i].coreId;
1340 if ((
int)threadCt > __kmp_nThreadsPerCore)
1341 __kmp_nThreadsPerCore = threadCt;
1343 lastThreadId = threadInfo[i].threadId;
1344 }
else if (threadInfo[i].threadId != lastThreadId) {
1346 lastThreadId = threadInfo[i].threadId;
1348 __kmp_free(threadInfo);
1349 KMP_CPU_FREE(oldMask);
1350 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1356 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1357 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1358 __kmp_free(threadInfo);
1359 KMP_CPU_FREE(oldMask);
1360 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1365 if ((
int)coreCt > nCoresPerPkg)
1366 nCoresPerPkg = coreCt;
1367 if ((
int)threadCt > __kmp_nThreadsPerCore)
1368 __kmp_nThreadsPerCore = threadCt;
1374 __kmp_ncores = nCores;
1375 if (__kmp_affinity_verbose) {
1376 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1377 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1378 if (__kmp_affinity_uniform_topology()) {
1379 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1381 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1383 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1384 __kmp_nThreadsPerCore, __kmp_ncores);
1386 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1387 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1388 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1389 for (i = 0; i < nApics; ++i) {
1390 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1392 if (__kmp_affinity_type == affinity_none) {
1393 __kmp_free(threadInfo);
1394 KMP_CPU_FREE(oldMask);
1402 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1404 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1405 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1407 KMP_ASSERT(depth > 0);
1408 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1410 for (i = 0; i < nApics; ++i) {
1411 Address addr(depth);
1412 unsigned os = threadInfo[i].osId;
1415 if (pkgLevel >= 0) {
1416 addr.labels[d++] = threadInfo[i].pkgId;
1418 if (coreLevel >= 0) {
1419 addr.labels[d++] = threadInfo[i].coreId;
1421 if (threadLevel >= 0) {
1422 addr.labels[d++] = threadInfo[i].threadId;
1424 (*address2os)[i] = AddrUnsPair(addr, os);
1427 if (__kmp_affinity_gran_levels < 0) {
1430 __kmp_affinity_gran_levels = 0;
1431 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1432 __kmp_affinity_gran_levels++;
1434 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1435 __kmp_affinity_gran_levels++;
1437 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1438 __kmp_affinity_gran_levels++;
1442 if (__kmp_affinity_verbose) {
1443 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1444 coreLevel, threadLevel);
1447 __kmp_free(threadInfo);
1448 KMP_CPU_FREE(oldMask);
1455 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1456 kmp_i18n_id_t *
const msg_id) {
1459 *msg_id = kmp_i18n_null;
1462 __kmp_x86_cpuid(0, 0, &buf);
1464 *msg_id = kmp_i18n_str_NoLeaf11Support;
1467 __kmp_x86_cpuid(11, 0, &buf);
1469 *msg_id = kmp_i18n_str_NoLeaf11Support;
1478 int threadLevel = -1;
1481 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1483 for (level = 0;; level++) {
1494 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1497 __kmp_x86_cpuid(11, level, &buf);
1506 int kind = (buf.ecx >> 8) & 0xff;
1509 threadLevel = level;
1512 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1513 if (__kmp_nThreadsPerCore == 0) {
1514 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1517 }
else if (kind == 2) {
1521 nCoresPerPkg = buf.ebx & 0xffff;
1522 if (nCoresPerPkg == 0) {
1523 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1528 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1531 if (pkgLevel >= 0) {
1535 nPackages = buf.ebx & 0xffff;
1536 if (nPackages == 0) {
1537 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1548 if (threadLevel >= 0) {
1549 threadLevel = depth - threadLevel - 1;
1551 if (coreLevel >= 0) {
1552 coreLevel = depth - coreLevel - 1;
1554 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1555 pkgLevel = depth - pkgLevel - 1;
1562 if (!KMP_AFFINITY_CAPABLE()) {
1565 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1567 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1568 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1569 if (__kmp_affinity_verbose) {
1570 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1571 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1572 if (__kmp_affinity_uniform_topology()) {
1573 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1575 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1577 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1578 __kmp_nThreadsPerCore, __kmp_ncores);
1588 kmp_affin_mask_t *oldMask;
1589 KMP_CPU_ALLOC(oldMask);
1590 __kmp_get_system_affinity(oldMask, TRUE);
1593 AddrUnsPair *retval =
1594 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1600 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1602 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1605 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1607 __kmp_affinity_dispatch->bind_thread(proc);
1610 Address addr(depth);
1613 for (level = 0; level < depth; level++) {
1614 __kmp_x86_cpuid(11, level, &buf);
1615 unsigned apicId = buf.edx;
1617 if (level != depth - 1) {
1618 KMP_CPU_FREE(oldMask);
1619 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1622 addr.labels[depth - level - 1] = apicId >> prev_shift;
1626 int shift = buf.eax & 0x1f;
1627 int mask = (1 << shift) - 1;
1628 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1631 if (level != depth) {
1632 KMP_CPU_FREE(oldMask);
1633 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1637 retval[nApics] = AddrUnsPair(addr, proc);
1643 __kmp_set_system_affinity(oldMask, TRUE);
1646 KMP_ASSERT(nApics > 0);
1648 __kmp_ncores = nPackages = 1;
1649 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1650 if (__kmp_affinity_verbose) {
1651 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1652 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1653 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1654 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1655 __kmp_nThreadsPerCore, __kmp_ncores);
1658 if (__kmp_affinity_type == affinity_none) {
1660 KMP_CPU_FREE(oldMask);
1666 addr.labels[0] = retval[0].first.labels[pkgLevel];
1667 retval[0].first = addr;
1669 if (__kmp_affinity_gran_levels < 0) {
1670 __kmp_affinity_gran_levels = 0;
1673 if (__kmp_affinity_verbose) {
1674 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1677 *address2os = retval;
1678 KMP_CPU_FREE(oldMask);
1683 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1686 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1687 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1688 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1689 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1690 for (level = 0; level < depth; level++) {
1694 last[level] = retval[0].first.labels[level];
1701 for (proc = 1; (int)proc < nApics; proc++) {
1703 for (level = 0; level < depth; level++) {
1704 if (retval[proc].first.labels[level] != last[level]) {
1706 for (j = level + 1; j < depth; j++) {
1717 last[j] = retval[proc].first.labels[j];
1721 if (counts[level] > maxCt[level]) {
1722 maxCt[level] = counts[level];
1724 last[level] = retval[proc].first.labels[level];
1726 }
else if (level == depth - 1) {
1732 KMP_CPU_FREE(oldMask);
1733 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1743 if (threadLevel >= 0) {
1744 __kmp_nThreadsPerCore = maxCt[threadLevel];
1746 __kmp_nThreadsPerCore = 1;
1748 nPackages = totals[pkgLevel];
1750 if (coreLevel >= 0) {
1751 __kmp_ncores = totals[coreLevel];
1752 nCoresPerPkg = maxCt[coreLevel];
1754 __kmp_ncores = nPackages;
1759 unsigned prod = maxCt[0];
1760 for (level = 1; level < depth; level++) {
1761 prod *= maxCt[level];
1763 bool uniform = (prod == totals[level - 1]);
1766 if (__kmp_affinity_verbose) {
1767 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1768 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1770 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1772 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1776 __kmp_str_buf_init(&buf);
1778 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1779 for (level = 1; level <= pkgLevel; level++) {
1780 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1782 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1783 __kmp_nThreadsPerCore, __kmp_ncores);
1785 __kmp_str_buf_free(&buf);
1787 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1788 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1789 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1790 for (proc = 0; (int)proc < nApics; ++proc) {
1791 __kmp_pu_os_idx[proc] = retval[proc].second;
1793 if (__kmp_affinity_type == affinity_none) {
1799 KMP_CPU_FREE(oldMask);
1806 for (level = 0; level < depth; level++) {
1807 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1815 if (new_depth != depth) {
1816 AddrUnsPair *new_retval =
1817 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1818 for (proc = 0; (int)proc < nApics; proc++) {
1819 Address addr(new_depth);
1820 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1823 int newPkgLevel = -1;
1824 int newCoreLevel = -1;
1825 int newThreadLevel = -1;
1826 for (level = 0; level < depth; level++) {
1827 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1831 if (level == pkgLevel) {
1832 newPkgLevel = new_level;
1834 if (level == coreLevel) {
1835 newCoreLevel = new_level;
1837 if (level == threadLevel) {
1838 newThreadLevel = new_level;
1840 for (proc = 0; (int)proc < nApics; proc++) {
1841 new_retval[proc].first.labels[new_level] =
1842 retval[proc].first.labels[level];
1848 retval = new_retval;
1850 pkgLevel = newPkgLevel;
1851 coreLevel = newCoreLevel;
1852 threadLevel = newThreadLevel;
1855 if (__kmp_affinity_gran_levels < 0) {
1858 __kmp_affinity_gran_levels = 0;
1859 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1860 __kmp_affinity_gran_levels++;
1862 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1863 __kmp_affinity_gran_levels++;
1865 if (__kmp_affinity_gran > affinity_gran_package) {
1866 __kmp_affinity_gran_levels++;
1870 if (__kmp_affinity_verbose) {
1871 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1879 KMP_CPU_FREE(oldMask);
1880 *address2os = retval;
1887 #define threadIdIndex 1
1888 #define coreIdIndex 2
1889 #define pkgIdIndex 3
1890 #define nodeIdIndex 4
1892 typedef unsigned *ProcCpuInfo;
1893 static unsigned maxIndex = pkgIdIndex;
1895 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1898 const unsigned *aa = *(
unsigned *
const *)a;
1899 const unsigned *bb = *(
unsigned *
const *)b;
1900 for (i = maxIndex;; i--) {
1911 #if KMP_USE_HIER_SCHED
1913 static void __kmp_dispatch_set_hierarchy_values() {
1919 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1920 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1921 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1922 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
1924 if (__kmp_mic_type >= mic3)
1925 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1928 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1929 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1930 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1931 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1934 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1935 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1936 __kmp_nThreadsPerCore;
1937 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS) && \
1939 if (__kmp_mic_type >= mic3)
1940 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1941 2 * __kmp_nThreadsPerCore;
1944 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1945 __kmp_nThreadsPerCore;
1946 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1947 nCoresPerPkg * __kmp_nThreadsPerCore;
1948 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1949 nCoresPerPkg * __kmp_nThreadsPerCore;
1950 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1951 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1956 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
1957 int index = type + 1;
1958 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
1959 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
1960 if (type == kmp_hier_layer_e::LAYER_THREAD)
1962 else if (type == kmp_hier_layer_e::LAYER_LOOP)
1964 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
1965 if (tid >= num_hw_threads)
1966 tid = tid % num_hw_threads;
1967 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
1971 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
1974 KMP_DEBUG_ASSERT(i1 <= i2);
1975 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
1976 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
1977 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
1979 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
1985 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
1987 kmp_i18n_id_t *
const msg_id,
1990 *msg_id = kmp_i18n_null;
1995 unsigned num_records = 0;
1997 buf[
sizeof(buf) - 1] = 1;
1998 if (!fgets(buf,
sizeof(buf), f)) {
2003 char s1[] =
"processor";
2004 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2011 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2012 if (nodeIdIndex + level >= maxIndex) {
2013 maxIndex = nodeIdIndex + level;
2021 if (num_records == 0) {
2023 *msg_id = kmp_i18n_str_NoProcRecords;
2026 if (num_records > (
unsigned)__kmp_xproc) {
2028 *msg_id = kmp_i18n_str_TooManyProcRecords;
2037 if (fseek(f, 0, SEEK_SET) != 0) {
2039 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2045 unsigned **threadInfo =
2046 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2048 for (i = 0; i <= num_records; i++) {
2050 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2053 #define CLEANUP_THREAD_INFO \
2054 for (i = 0; i <= num_records; i++) { \
2055 __kmp_free(threadInfo[i]); \
2057 __kmp_free(threadInfo);
2062 #define INIT_PROC_INFO(p) \
2063 for (__index = 0; __index <= maxIndex; __index++) { \
2064 (p)[__index] = UINT_MAX; \
2067 for (i = 0; i <= num_records; i++) {
2068 INIT_PROC_INFO(threadInfo[i]);
2071 unsigned num_avail = 0;
2078 buf[
sizeof(buf) - 1] = 1;
2079 bool long_line =
false;
2080 if (!fgets(buf,
sizeof(buf), f)) {
2085 for (i = 0; i <= maxIndex; i++) {
2086 if (threadInfo[num_avail][i] != UINT_MAX) {
2094 }
else if (!buf[
sizeof(buf) - 1]) {
2099 #define CHECK_LINE \
2101 CLEANUP_THREAD_INFO; \
2102 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2108 char s1[] =
"processor";
2109 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2111 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2113 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2115 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2116 #if KMP_ARCH_AARCH64
2125 threadInfo[num_avail][osIdIndex] = val;
2126 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
2130 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2131 threadInfo[num_avail][osIdIndex]);
2132 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2134 KMP_SNPRINTF(path,
sizeof(path),
2135 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2136 threadInfo[num_avail][osIdIndex]);
2137 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2141 char s2[] =
"physical id";
2142 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2144 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2146 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2148 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2150 threadInfo[num_avail][pkgIdIndex] = val;
2153 char s3[] =
"core id";
2154 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2156 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2158 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2160 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2162 threadInfo[num_avail][coreIdIndex] = val;
2166 char s4[] =
"thread id";
2167 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2169 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2171 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2173 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2175 threadInfo[num_avail][threadIdIndex] = val;
2179 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2181 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2183 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2185 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2186 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2188 threadInfo[num_avail][nodeIdIndex + level] = val;
2195 if ((*buf != 0) && (*buf !=
'\n')) {
2200 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2208 if ((
int)num_avail == __kmp_xproc) {
2209 CLEANUP_THREAD_INFO;
2210 *msg_id = kmp_i18n_str_TooManyEntries;
2216 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2217 CLEANUP_THREAD_INFO;
2218 *msg_id = kmp_i18n_str_MissingProcField;
2221 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2222 CLEANUP_THREAD_INFO;
2223 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2228 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2229 __kmp_affin_fullMask)) {
2230 INIT_PROC_INFO(threadInfo[num_avail]);
2237 KMP_ASSERT(num_avail <= num_records);
2238 INIT_PROC_INFO(threadInfo[num_avail]);
2243 CLEANUP_THREAD_INFO;
2244 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2248 CLEANUP_THREAD_INFO;
2249 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2254 #if KMP_MIC && REDUCE_TEAM_SIZE
2255 unsigned teamSize = 0;
2267 KMP_ASSERT(num_avail > 0);
2268 KMP_ASSERT(num_avail <= num_records);
2269 if (num_avail == 1) {
2271 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2272 if (__kmp_affinity_verbose) {
2273 if (!KMP_AFFINITY_CAPABLE()) {
2274 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2275 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2276 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2278 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2279 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2280 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2284 __kmp_str_buf_init(&buf);
2285 __kmp_str_buf_print(&buf,
"1");
2286 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2287 __kmp_str_buf_print(&buf,
" x 1");
2289 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2290 __kmp_str_buf_free(&buf);
2293 if (__kmp_affinity_type == affinity_none) {
2294 CLEANUP_THREAD_INFO;
2298 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2300 addr.labels[0] = threadInfo[0][pkgIdIndex];
2301 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2303 if (__kmp_affinity_gran_levels < 0) {
2304 __kmp_affinity_gran_levels = 0;
2307 if (__kmp_affinity_verbose) {
2308 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2311 CLEANUP_THREAD_INFO;
2316 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2317 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2329 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2331 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2333 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2335 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2337 bool assign_thread_ids =
false;
2338 unsigned threadIdCt;
2341 restart_radix_check:
2345 if (assign_thread_ids) {
2346 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2347 threadInfo[0][threadIdIndex] = threadIdCt++;
2348 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2349 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2352 for (index = 0; index <= maxIndex; index++) {
2356 lastId[index] = threadInfo[0][index];
2361 for (i = 1; i < num_avail; i++) {
2364 for (index = maxIndex; index >= threadIdIndex; index--) {
2365 if (assign_thread_ids && (index == threadIdIndex)) {
2367 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2368 threadInfo[i][threadIdIndex] = threadIdCt++;
2372 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2373 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2376 if (threadInfo[i][index] != lastId[index]) {
2381 for (index2 = threadIdIndex; index2 < index; index2++) {
2383 if (counts[index2] > maxCt[index2]) {
2384 maxCt[index2] = counts[index2];
2387 lastId[index2] = threadInfo[i][index2];
2391 lastId[index] = threadInfo[i][index];
2393 if (assign_thread_ids && (index > threadIdIndex)) {
2395 #if KMP_MIC && REDUCE_TEAM_SIZE
2398 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2405 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2406 threadInfo[i][threadIdIndex] = threadIdCt++;
2412 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2413 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2419 if (index < threadIdIndex) {
2423 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2428 CLEANUP_THREAD_INFO;
2429 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2435 assign_thread_ids =
true;
2436 goto restart_radix_check;
2440 #if KMP_MIC && REDUCE_TEAM_SIZE
2443 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2446 for (index = threadIdIndex; index <= maxIndex; index++) {
2447 if (counts[index] > maxCt[index]) {
2448 maxCt[index] = counts[index];
2452 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2453 nCoresPerPkg = maxCt[coreIdIndex];
2454 nPackages = totals[pkgIdIndex];
2457 unsigned prod = totals[maxIndex];
2458 for (index = threadIdIndex; index < maxIndex; index++) {
2459 prod *= maxCt[index];
2461 bool uniform = (prod == totals[threadIdIndex]);
2467 __kmp_ncores = totals[coreIdIndex];
2469 if (__kmp_affinity_verbose) {
2470 if (!KMP_AFFINITY_CAPABLE()) {
2471 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2472 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2474 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2476 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2479 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2480 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2482 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2484 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2488 __kmp_str_buf_init(&buf);
2490 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2491 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2492 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2494 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2495 maxCt[threadIdIndex], __kmp_ncores);
2497 __kmp_str_buf_free(&buf);
2500 #if KMP_MIC && REDUCE_TEAM_SIZE
2502 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2503 __kmp_dflt_team_nth = teamSize;
2504 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting "
2505 "__kmp_dflt_team_nth = %d\n",
2506 __kmp_dflt_team_nth));
2510 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2511 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2512 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2513 for (i = 0; i < num_avail; ++i) {
2514 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2517 if (__kmp_affinity_type == affinity_none) {
2522 CLEANUP_THREAD_INFO;
2531 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2532 for (index = threadIdIndex; index < maxIndex; index++) {
2533 KMP_ASSERT(totals[index] >= totals[index + 1]);
2534 inMap[index] = (totals[index] > totals[index + 1]);
2536 inMap[maxIndex] = (totals[maxIndex] > 1);
2537 inMap[pkgIdIndex] =
true;
2540 for (index = threadIdIndex; index <= maxIndex; index++) {
2545 KMP_ASSERT(depth > 0);
2548 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2551 int threadLevel = -1;
2553 for (i = 0; i < num_avail; ++i) {
2554 Address addr(depth);
2555 unsigned os = threadInfo[i][osIdIndex];
2559 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2560 if (!inMap[src_index]) {
2563 addr.labels[dst_index] = threadInfo[i][src_index];
2564 if (src_index == pkgIdIndex) {
2565 pkgLevel = dst_index;
2566 }
else if (src_index == coreIdIndex) {
2567 coreLevel = dst_index;
2568 }
else if (src_index == threadIdIndex) {
2569 threadLevel = dst_index;
2573 (*address2os)[i] = AddrUnsPair(addr, os);
2576 if (__kmp_affinity_gran_levels < 0) {
2580 __kmp_affinity_gran_levels = 0;
2581 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2582 if (!inMap[src_index]) {
2585 switch (src_index) {
2587 if (__kmp_affinity_gran > affinity_gran_thread) {
2588 __kmp_affinity_gran_levels++;
2593 if (__kmp_affinity_gran > affinity_gran_core) {
2594 __kmp_affinity_gran_levels++;
2599 if (__kmp_affinity_gran > affinity_gran_package) {
2600 __kmp_affinity_gran_levels++;
2607 if (__kmp_affinity_verbose) {
2608 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2609 coreLevel, threadLevel);
2617 CLEANUP_THREAD_INFO;
2624 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2625 unsigned *numUnique,
2626 AddrUnsPair *address2os,
2627 unsigned numAddrs) {
2633 KMP_ASSERT(numAddrs > 0);
2634 depth = address2os[0].first.depth;
2637 for (i = numAddrs - 1;; --i) {
2638 unsigned osId = address2os[i].second;
2639 if (osId > maxOsId) {
2645 kmp_affin_mask_t *osId2Mask;
2646 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2650 qsort(address2os, numAddrs,
sizeof(*address2os),
2651 __kmp_affinity_cmp_Address_labels);
2653 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2654 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2655 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2657 if (__kmp_affinity_gran_levels >= (
int)depth) {
2658 if (__kmp_affinity_verbose ||
2659 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2660 KMP_WARNING(AffThreadsMayMigrate);
2668 unsigned unique = 0;
2670 unsigned leader = 0;
2671 Address *leaderAddr = &(address2os[0].first);
2672 kmp_affin_mask_t *sum;
2673 KMP_CPU_ALLOC_ON_STACK(sum);
2675 KMP_CPU_SET(address2os[0].second, sum);
2676 for (i = 1; i < numAddrs; i++) {
2680 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2681 KMP_CPU_SET(address2os[i].second, sum);
2687 for (; j < i; j++) {
2688 unsigned osId = address2os[j].second;
2689 KMP_DEBUG_ASSERT(osId <= maxOsId);
2690 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2691 KMP_CPU_COPY(mask, sum);
2692 address2os[j].first.leader = (j == leader);
2698 leaderAddr = &(address2os[i].first);
2700 KMP_CPU_SET(address2os[i].second, sum);
2705 for (; j < i; j++) {
2706 unsigned osId = address2os[j].second;
2707 KMP_DEBUG_ASSERT(osId <= maxOsId);
2708 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2709 KMP_CPU_COPY(mask, sum);
2710 address2os[j].first.leader = (j == leader);
2713 KMP_CPU_FREE_FROM_STACK(sum);
2715 *maxIndex = maxOsId;
2716 *numUnique = unique;
2723 static kmp_affin_mask_t *newMasks;
2724 static int numNewMasks;
2725 static int nextNewMask;
2727 #define ADD_MASK(_mask) \
2729 if (nextNewMask >= numNewMasks) { \
2732 kmp_affin_mask_t *temp; \
2733 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
2734 for (i = 0; i < numNewMasks / 2; i++) { \
2735 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
2736 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
2737 KMP_CPU_COPY(dest, src); \
2739 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
2742 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2746 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
2748 if (((_osId) > _maxOsId) || \
2749 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2750 if (__kmp_affinity_verbose || \
2751 (__kmp_affinity_warnings && \
2752 (__kmp_affinity_type != affinity_none))) { \
2753 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2756 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2762 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2763 unsigned int *out_numMasks,
2764 const char *proclist,
2765 kmp_affin_mask_t *osId2Mask,
2768 const char *scan = proclist;
2769 const char *next = proclist;
2774 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2776 kmp_affin_mask_t *sumMask;
2777 KMP_CPU_ALLOC(sumMask);
2781 int start, end, stride;
2785 if (*next ==
'\0') {
2797 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2799 num = __kmp_str_to_int(scan, *next);
2800 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2803 if ((num > maxOsId) ||
2804 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2805 if (__kmp_affinity_verbose ||
2806 (__kmp_affinity_warnings &&
2807 (__kmp_affinity_type != affinity_none))) {
2808 KMP_WARNING(AffIgnoreInvalidProcID, num);
2810 KMP_CPU_ZERO(sumMask);
2812 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2832 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2835 num = __kmp_str_to_int(scan, *next);
2836 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2839 if ((num > maxOsId) ||
2840 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2841 if (__kmp_affinity_verbose ||
2842 (__kmp_affinity_warnings &&
2843 (__kmp_affinity_type != affinity_none))) {
2844 KMP_WARNING(AffIgnoreInvalidProcID, num);
2847 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2864 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2866 start = __kmp_str_to_int(scan, *next);
2867 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2872 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2886 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2888 end = __kmp_str_to_int(scan, *next);
2889 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2906 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2908 stride = __kmp_str_to_int(scan, *next);
2909 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2914 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2916 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2918 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2920 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2925 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2927 }
while (start <= end);
2930 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2932 }
while (start >= end);
2943 *out_numMasks = nextNewMask;
2944 if (nextNewMask == 0) {
2946 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2949 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2950 for (i = 0; i < nextNewMask; i++) {
2951 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
2952 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
2953 KMP_CPU_COPY(dest, src);
2955 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2956 KMP_CPU_FREE(sumMask);
2979 static void __kmp_process_subplace_list(
const char **scan,
2980 kmp_affin_mask_t *osId2Mask,
2981 int maxOsId, kmp_affin_mask_t *tempMask,
2986 int start, count, stride, i;
2990 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
2993 start = __kmp_str_to_int(*scan, *next);
2994 KMP_ASSERT(start >= 0);
2999 if (**scan ==
'}' || **scan ==
',') {
3000 if ((start > maxOsId) ||
3001 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3002 if (__kmp_affinity_verbose ||
3003 (__kmp_affinity_warnings &&
3004 (__kmp_affinity_type != affinity_none))) {
3005 KMP_WARNING(AffIgnoreInvalidProcID, start);
3008 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3011 if (**scan ==
'}') {
3017 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3022 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3025 count = __kmp_str_to_int(*scan, *next);
3026 KMP_ASSERT(count >= 0);
3031 if (**scan ==
'}' || **scan ==
',') {
3032 for (i = 0; i < count; i++) {
3033 if ((start > maxOsId) ||
3034 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3035 if (__kmp_affinity_verbose ||
3036 (__kmp_affinity_warnings &&
3037 (__kmp_affinity_type != affinity_none))) {
3038 KMP_WARNING(AffIgnoreInvalidProcID, start);
3042 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3047 if (**scan ==
'}') {
3053 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3060 if (**scan ==
'+') {
3064 if (**scan ==
'-') {
3072 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3075 stride = __kmp_str_to_int(*scan, *next);
3076 KMP_ASSERT(stride >= 0);
3082 if (**scan ==
'}' || **scan ==
',') {
3083 for (i = 0; i < count; i++) {
3084 if ((start > maxOsId) ||
3085 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3086 if (__kmp_affinity_verbose ||
3087 (__kmp_affinity_warnings &&
3088 (__kmp_affinity_type != affinity_none))) {
3089 KMP_WARNING(AffIgnoreInvalidProcID, start);
3093 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3098 if (**scan ==
'}') {
3105 KMP_ASSERT2(0,
"bad explicit places list");
3109 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3110 int maxOsId, kmp_affin_mask_t *tempMask,
3116 if (**scan ==
'{') {
3118 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3119 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3121 }
else if (**scan ==
'!') {
3123 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3124 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3125 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3128 int num = __kmp_str_to_int(*scan, *next);
3129 KMP_ASSERT(num >= 0);
3130 if ((num > maxOsId) ||
3131 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3132 if (__kmp_affinity_verbose ||
3133 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3134 KMP_WARNING(AffIgnoreInvalidProcID, num);
3137 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3142 KMP_ASSERT2(0,
"bad explicit places list");
3147 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3148 unsigned int *out_numMasks,
3149 const char *placelist,
3150 kmp_affin_mask_t *osId2Mask,
3152 int i, j, count, stride, sign;
3153 const char *scan = placelist;
3154 const char *next = placelist;
3157 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3163 kmp_affin_mask_t *tempMask;
3164 kmp_affin_mask_t *previousMask;
3165 KMP_CPU_ALLOC(tempMask);
3166 KMP_CPU_ZERO(tempMask);
3167 KMP_CPU_ALLOC(previousMask);
3168 KMP_CPU_ZERO(previousMask);
3172 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3176 if (*scan ==
'\0' || *scan ==
',') {
3180 KMP_CPU_ZERO(tempMask);
3182 if (*scan ==
'\0') {
3189 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3194 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3197 count = __kmp_str_to_int(scan, *next);
3198 KMP_ASSERT(count >= 0);
3203 if (*scan ==
'\0' || *scan ==
',') {
3206 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3225 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3228 stride = __kmp_str_to_int(scan, *next);
3229 KMP_DEBUG_ASSERT(stride >= 0);
3235 for (i = 0; i < count; i++) {
3240 KMP_CPU_COPY(previousMask, tempMask);
3241 ADD_MASK(previousMask);
3242 KMP_CPU_ZERO(tempMask);
3244 KMP_CPU_SET_ITERATE(j, previousMask) {
3245 if (!KMP_CPU_ISSET(j, previousMask)) {
3248 if ((j + stride > maxOsId) || (j + stride < 0) ||
3249 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3250 (!KMP_CPU_ISSET(j + stride,
3251 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3252 if ((__kmp_affinity_verbose ||
3253 (__kmp_affinity_warnings &&
3254 (__kmp_affinity_type != affinity_none))) &&
3256 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3260 KMP_CPU_SET(j + stride, tempMask);
3264 KMP_CPU_ZERO(tempMask);
3269 if (*scan ==
'\0') {
3277 KMP_ASSERT2(0,
"bad explicit places list");
3280 *out_numMasks = nextNewMask;
3281 if (nextNewMask == 0) {
3283 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3286 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3287 KMP_CPU_FREE(tempMask);
3288 KMP_CPU_FREE(previousMask);
3289 for (i = 0; i < nextNewMask; i++) {
3290 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3291 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3292 KMP_CPU_COPY(dest, src);
3294 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3298 #undef ADD_MASK_OSID
3301 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3304 hwloc_obj_t hT = NULL;
3305 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3306 for (
int i = 0; i < N; ++i) {
3307 KMP_DEBUG_ASSERT(hT);
3308 unsigned idx = hT->os_index;
3309 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3310 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3311 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3314 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3319 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3321 hwloc_obj_t hT = NULL;
3322 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3323 for (
int i = 0; i < N; ++i) {
3324 KMP_DEBUG_ASSERT(hT);
3325 unsigned idx = hT->os_index;
3326 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3328 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3334 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3335 AddrUnsPair *newAddr;
3336 if (__kmp_hws_requested == 0)
3339 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3343 hwloc_topology_t tp = __kmp_hwloc_topology;
3344 int nS = 0, nN = 0, nL = 0, nC = 0,
3346 int nCr = 0, nTr = 0;
3347 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3348 hwloc_obj_t hT, hC, hL, hN, hS;
3352 int numa_support = 0, tile_support = 0;
3353 if (__kmp_pu_os_idx)
3354 hT = hwloc_get_pu_obj_by_os_index(tp,
3355 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3357 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3359 KMP_WARNING(AffHWSubsetUnsupported);
3363 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3364 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3365 if (hN != NULL && hN->depth > hS->depth) {
3367 }
else if (__kmp_hws_node.num > 0) {
3369 KMP_WARNING(AffHWSubsetUnsupported);
3373 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3374 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3376 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3378 }
else if (__kmp_hws_tile.num > 0) {
3379 if (__kmp_hws_core.num == 0) {
3380 __kmp_hws_core = __kmp_hws_tile;
3381 __kmp_hws_tile.num = 0;
3384 KMP_WARNING(AffHWSubsetInvalid);
3391 if (__kmp_hws_socket.num == 0)
3392 __kmp_hws_socket.num = nPackages;
3393 if (__kmp_hws_socket.offset >= nPackages) {
3394 KMP_WARNING(AffHWSubsetManySockets);
3399 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3401 if (__kmp_hws_node.num == 0)
3402 __kmp_hws_node.num = NN;
3403 if (__kmp_hws_node.offset >= NN) {
3404 KMP_WARNING(AffHWSubsetManyNodes);
3409 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3410 if (__kmp_hws_tile.num == 0) {
3411 __kmp_hws_tile.num = NL + 1;
3413 if (__kmp_hws_tile.offset >= NL) {
3414 KMP_WARNING(AffHWSubsetManyTiles);
3417 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3419 if (__kmp_hws_core.num == 0)
3420 __kmp_hws_core.num = NC;
3421 if (__kmp_hws_core.offset >= NC) {
3422 KMP_WARNING(AffHWSubsetManyCores);
3426 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3428 if (__kmp_hws_core.num == 0)
3429 __kmp_hws_core.num = NC;
3430 if (__kmp_hws_core.offset >= NC) {
3431 KMP_WARNING(AffHWSubsetManyCores);
3438 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3439 if (__kmp_hws_tile.num == 0)
3440 __kmp_hws_tile.num = NL;
3441 if (__kmp_hws_tile.offset >= NL) {
3442 KMP_WARNING(AffHWSubsetManyTiles);
3445 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3447 if (__kmp_hws_core.num == 0)
3448 __kmp_hws_core.num = NC;
3449 if (__kmp_hws_core.offset >= NC) {
3450 KMP_WARNING(AffHWSubsetManyCores);
3454 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3456 if (__kmp_hws_core.num == 0)
3457 __kmp_hws_core.num = NC;
3458 if (__kmp_hws_core.offset >= NC) {
3459 KMP_WARNING(AffHWSubsetManyCores);
3464 if (__kmp_hws_proc.num == 0)
3465 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3466 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3467 KMP_WARNING(AffHWSubsetManyProcs);
3473 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3477 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3478 for (
int s = 0; s < NP; ++s) {
3480 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3481 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3484 if (nS <= __kmp_hws_socket.offset ||
3485 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3486 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3497 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3498 for (
int n = 0; n < NN; ++n) {
3500 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3501 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3505 if (nN <= __kmp_hws_node.offset ||
3506 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3508 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3509 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3516 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3517 for (
int l = 0; l < NL; ++l) {
3519 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3520 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3524 if (nL <= __kmp_hws_tile.offset ||
3525 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3527 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3528 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3535 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3536 HWLOC_OBJ_CORE, &hC);
3537 for (
int c = 0; c < NC; ++c) {
3539 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3540 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3544 if (nC <= __kmp_hws_core.offset ||
3545 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3547 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3548 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3556 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3558 for (
int t = 0; t < NT; ++t) {
3561 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3562 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3566 if (nT <= __kmp_hws_proc.offset ||
3567 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3569 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3571 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3572 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3577 newAddr[n_new] = (*pAddr)[n_old];
3580 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3588 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3590 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3598 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3599 for (
int c = 0; c < NC; ++c) {
3601 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3602 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3606 if (nC <= __kmp_hws_core.offset ||
3607 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3609 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3610 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3618 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3619 for (
int t = 0; t < NT; ++t) {
3622 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3623 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3627 if (nT <= __kmp_hws_proc.offset ||
3628 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3630 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3632 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3633 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3638 newAddr[n_new] = (*pAddr)[n_old];
3641 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3649 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3652 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3660 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3661 for (
int l = 0; l < NL; ++l) {
3663 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3664 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3668 if (nL <= __kmp_hws_tile.offset ||
3669 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3671 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3672 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3680 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3681 for (
int c = 0; c < NC; ++c) {
3683 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3684 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3688 if (nC <= __kmp_hws_core.offset ||
3689 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3691 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3692 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3701 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3702 for (
int t = 0; t < NT; ++t) {
3705 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3706 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3710 if (nT <= __kmp_hws_proc.offset ||
3711 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3713 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3715 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3716 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3721 newAddr[n_new] = (*pAddr)[n_old];
3724 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3732 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3734 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3742 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3743 for (
int c = 0; c < NC; ++c) {
3745 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3746 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3750 if (nC <= __kmp_hws_core.offset ||
3751 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3753 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3754 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3763 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3764 for (
int t = 0; t < NT; ++t) {
3767 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3768 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3772 if (nT <= __kmp_hws_proc.offset ||
3773 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3775 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3777 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3778 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3783 newAddr[n_new] = (*pAddr)[n_old];
3786 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3794 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3806 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3807 KMP_DEBUG_ASSERT(nPkg > 0);
3808 KMP_DEBUG_ASSERT(nCpP > 0);
3809 KMP_DEBUG_ASSERT(nTpC > 0);
3810 KMP_DEBUG_ASSERT(nCo > 0);
3811 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3812 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3813 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3814 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3817 nCoresPerPkg = nCpP;
3818 __kmp_nThreadsPerCore = nTpC;
3819 __kmp_avail_proc = n_new;
3825 int n_old = 0, n_new = 0, proc_num = 0;
3826 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3827 KMP_WARNING(AffHWSubsetNoHWLOC);
3830 if (__kmp_hws_socket.num == 0)
3831 __kmp_hws_socket.num = nPackages;
3832 if (__kmp_hws_core.num == 0)
3833 __kmp_hws_core.num = nCoresPerPkg;
3834 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3835 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3836 if (!__kmp_affinity_uniform_topology()) {
3837 KMP_WARNING(AffHWSubsetNonUniform);
3841 KMP_WARNING(AffHWSubsetNonThreeLevel);
3844 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3845 KMP_WARNING(AffHWSubsetManySockets);
3848 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3849 KMP_WARNING(AffHWSubsetManyCores);
3854 newAddr = (AddrUnsPair *)__kmp_allocate(
3855 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3856 __kmp_hws_proc.num);
3857 for (
int i = 0; i < nPackages; ++i) {
3858 if (i < __kmp_hws_socket.offset ||
3859 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3861 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3862 if (__kmp_pu_os_idx != NULL) {
3864 for (
int j = 0; j < nCoresPerPkg; ++j) {
3865 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3866 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3873 for (
int j = 0; j < nCoresPerPkg; ++j) {
3874 if (j < __kmp_hws_core.offset ||
3875 j >= __kmp_hws_core.offset +
3876 __kmp_hws_core.num) {
3877 n_old += __kmp_nThreadsPerCore;
3878 if (__kmp_pu_os_idx != NULL) {
3879 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3880 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3886 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3887 if (k < __kmp_hws_proc.num) {
3889 newAddr[n_new] = (*pAddr)[n_old];
3892 if (__kmp_pu_os_idx != NULL)
3893 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3902 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3903 KMP_DEBUG_ASSERT(n_new ==
3904 __kmp_hws_socket.num * __kmp_hws_core.num *
3905 __kmp_hws_proc.num);
3906 nPackages = __kmp_hws_socket.num;
3907 nCoresPerPkg = __kmp_hws_core.num;
3908 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3909 __kmp_avail_proc = n_new;
3910 __kmp_ncores = nPackages * __kmp_hws_core.num;
3916 if (__kmp_affinity_verbose) {
3917 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3919 __kmp_str_buf_init(&buf);
3920 __kmp_str_buf_print(&buf,
"%d", nPackages);
3921 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3922 __kmp_nThreadsPerCore, __kmp_ncores);
3923 __kmp_str_buf_free(&buf);
3926 if (__kmp_pu_os_idx != NULL) {
3927 __kmp_free(__kmp_pu_os_idx);
3928 __kmp_pu_os_idx = NULL;
3934 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
3935 int nprocs,
int bottom_level) {
3938 for (
int i = 0; i < nprocs; i++) {
3939 for (
int j = bottom_level; j > 0; j--) {
3940 if (address2os[i].first.labels[j] > 0) {
3941 if (core_level < (j - 1)) {
3951 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
3952 int nprocs,
int bottom_level,
3958 for (i = 0; i < nprocs; i++) {
3959 for (j = bottom_level; j > core_level; j--) {
3960 if ((i + 1) < nprocs) {
3961 if (address2os[i + 1].first.labels[j] > 0) {
3966 if (j == core_level) {
3970 if (j > core_level) {
3979 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
3980 int bottom_level,
int core_level) {
3981 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
3988 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
3989 int nprocs,
int bottom_level,
3991 int maxprocpercore = 0;
3993 if (core_level < bottom_level) {
3994 for (
int i = 0; i < nprocs; i++) {
3995 int percore = address2os[i].first.labels[core_level + 1] + 1;
3997 if (percore > maxprocpercore) {
3998 maxprocpercore = percore;
4004 return maxprocpercore;
4007 static AddrUnsPair *address2os = NULL;
4008 static int *procarr = NULL;
4009 static int __kmp_aff_depth = 0;
4011 #if KMP_USE_HIER_SCHED
4012 #define KMP_EXIT_AFF_NONE \
4013 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4014 KMP_ASSERT(address2os == NULL); \
4015 __kmp_apply_thread_places(NULL, 0); \
4016 __kmp_create_affinity_none_places(); \
4017 __kmp_dispatch_set_hierarchy_values(); \
4020 #define KMP_EXIT_AFF_NONE \
4021 KMP_ASSERT(__kmp_affinity_type == affinity_none); \
4022 KMP_ASSERT(address2os == NULL); \
4023 __kmp_apply_thread_places(NULL, 0); \
4024 __kmp_create_affinity_none_places(); \
4030 static void __kmp_create_affinity_none_places() {
4031 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4032 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4033 __kmp_affinity_num_masks = 1;
4034 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4035 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4036 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4039 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4040 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4041 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4042 unsigned depth = aa->depth;
4044 KMP_DEBUG_ASSERT(depth == bb->depth);
4045 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4046 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4047 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4048 int j = depth - i - 1;
4049 if (aa->childNums[j] < bb->childNums[j])
4051 if (aa->childNums[j] > bb->childNums[j])
4054 for (; i < depth; i++) {
4055 int j = i - __kmp_affinity_compact;
4056 if (aa->childNums[j] < bb->childNums[j])
4058 if (aa->childNums[j] > bb->childNums[j])
4064 static void __kmp_aux_affinity_initialize(
void) {
4065 if (__kmp_affinity_masks != NULL) {
4066 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4074 if (__kmp_affin_fullMask == NULL) {
4075 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4077 if (KMP_AFFINITY_CAPABLE()) {
4078 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4079 if (__kmp_affinity_respect_mask) {
4082 __kmp_avail_proc = 0;
4083 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4084 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4089 if (__kmp_avail_proc > __kmp_xproc) {
4090 if (__kmp_affinity_verbose ||
4091 (__kmp_affinity_warnings &&
4092 (__kmp_affinity_type != affinity_none))) {
4093 KMP_WARNING(ErrorInitializeAffinity);
4095 __kmp_affinity_type = affinity_none;
4096 KMP_AFFINITY_DISABLE();
4100 if (__kmp_affinity_verbose) {
4101 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4102 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4103 __kmp_affin_fullMask);
4104 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
4107 if (__kmp_affinity_verbose) {
4108 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4109 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4110 __kmp_affin_fullMask);
4111 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
4113 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4114 __kmp_avail_proc = __kmp_xproc;
4118 __kmp_affin_fullMask->set_process_affinity(
true);
4123 if (__kmp_affinity_gran == affinity_gran_tile &&
4125 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4126 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4127 __kmp_affinity_gran = affinity_gran_package;
4131 kmp_i18n_id_t msg_id = kmp_i18n_null;
4135 if ((__kmp_cpuinfo_file != NULL) &&
4136 (__kmp_affinity_top_method == affinity_top_method_all)) {
4137 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4140 if (__kmp_affinity_top_method == affinity_top_method_all) {
4144 const char *file_name = NULL;
4148 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4149 if (__kmp_affinity_verbose) {
4150 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4152 if (!__kmp_hwloc_error) {
4153 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4156 }
else if (depth < 0 && __kmp_affinity_verbose) {
4157 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4159 }
else if (__kmp_affinity_verbose) {
4160 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4165 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4168 if (__kmp_affinity_verbose) {
4169 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4173 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4179 if (__kmp_affinity_verbose) {
4180 if (msg_id != kmp_i18n_null) {
4181 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4182 __kmp_i18n_catgets(msg_id),
4183 KMP_I18N_STR(DecodingLegacyAPIC));
4185 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4186 KMP_I18N_STR(DecodingLegacyAPIC));
4191 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4203 if (__kmp_affinity_verbose) {
4204 if (msg_id != kmp_i18n_null) {
4205 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4206 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4208 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4213 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4221 #if KMP_GROUP_AFFINITY
4223 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4224 if (__kmp_affinity_verbose) {
4225 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4228 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4229 KMP_ASSERT(depth != 0);
4235 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4236 if (file_name == NULL) {
4237 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4238 }
else if (line == 0) {
4239 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4241 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4242 __kmp_i18n_catgets(msg_id));
4248 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4252 KMP_ASSERT(depth > 0);
4253 KMP_ASSERT(address2os != NULL);
4258 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4259 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4260 if (__kmp_affinity_verbose) {
4261 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4263 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4274 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4276 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4277 if (__kmp_affinity_verbose) {
4278 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4281 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4286 KMP_ASSERT(msg_id != kmp_i18n_null);
4287 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4289 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4290 if (__kmp_affinity_verbose) {
4291 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4294 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4299 KMP_ASSERT(msg_id != kmp_i18n_null);
4300 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4306 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4307 const char *filename;
4308 const char *env_var =
nullptr;
4309 if (__kmp_cpuinfo_file != NULL) {
4310 filename = __kmp_cpuinfo_file;
4311 env_var =
"KMP_CPUINFO_FILE";
4313 filename =
"/proc/cpuinfo";
4316 if (__kmp_affinity_verbose) {
4317 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4322 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4324 KMP_ASSERT(msg_id != kmp_i18n_null);
4326 KMP_FATAL(FileLineMsgExiting, filename, line,
4327 __kmp_i18n_catgets(msg_id));
4329 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4332 if (__kmp_affinity_type == affinity_none) {
4333 KMP_ASSERT(depth == 0);
4338 #if KMP_GROUP_AFFINITY
4340 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4341 if (__kmp_affinity_verbose) {
4342 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4345 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4346 KMP_ASSERT(depth != 0);
4348 KMP_ASSERT(msg_id != kmp_i18n_null);
4349 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4355 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4356 if (__kmp_affinity_verbose) {
4357 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4360 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4365 KMP_ASSERT(depth > 0);
4366 KMP_ASSERT(address2os != NULL);
4369 #if KMP_USE_HIER_SCHED
4370 __kmp_dispatch_set_hierarchy_values();
4373 if (address2os == NULL) {
4374 if (KMP_AFFINITY_CAPABLE() &&
4375 (__kmp_affinity_verbose ||
4376 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4377 KMP_WARNING(ErrorInitializeAffinity);
4379 __kmp_affinity_type = affinity_none;
4380 __kmp_create_affinity_none_places();
4381 KMP_AFFINITY_DISABLE();
4385 if (__kmp_affinity_gran == affinity_gran_tile
4387 && __kmp_tile_depth == 0
4391 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4394 __kmp_apply_thread_places(&address2os, depth);
4399 kmp_affin_mask_t *osId2Mask =
4400 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4401 if (__kmp_affinity_gran_levels == 0) {
4402 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4408 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4410 switch (__kmp_affinity_type) {
4412 case affinity_explicit:
4413 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4414 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4415 __kmp_affinity_process_proclist(
4416 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4417 __kmp_affinity_proclist, osId2Mask, maxIndex);
4419 __kmp_affinity_process_placelist(
4420 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4421 __kmp_affinity_proclist, osId2Mask, maxIndex);
4423 if (__kmp_affinity_num_masks == 0) {
4424 if (__kmp_affinity_verbose ||
4425 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4426 KMP_WARNING(AffNoValidProcID);
4428 __kmp_affinity_type = affinity_none;
4429 __kmp_create_affinity_none_places();
4439 case affinity_logical:
4440 __kmp_affinity_compact = 0;
4441 if (__kmp_affinity_offset) {
4442 __kmp_affinity_offset =
4443 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4447 case affinity_physical:
4448 if (__kmp_nThreadsPerCore > 1) {
4449 __kmp_affinity_compact = 1;
4450 if (__kmp_affinity_compact >= depth) {
4451 __kmp_affinity_compact = 0;
4454 __kmp_affinity_compact = 0;
4456 if (__kmp_affinity_offset) {
4457 __kmp_affinity_offset =
4458 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4462 case affinity_scatter:
4463 if (__kmp_affinity_compact >= depth) {
4464 __kmp_affinity_compact = 0;
4466 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4470 case affinity_compact:
4471 if (__kmp_affinity_compact >= depth) {
4472 __kmp_affinity_compact = depth - 1;
4476 case affinity_balanced:
4478 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4479 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4481 __kmp_affinity_type = affinity_none;
4482 __kmp_create_affinity_none_places();
4484 }
else if (!__kmp_affinity_uniform_topology()) {
4486 __kmp_aff_depth = depth;
4488 int core_level = __kmp_affinity_find_core_level(
4489 address2os, __kmp_avail_proc, depth - 1);
4490 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4491 depth - 1, core_level);
4492 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4493 address2os, __kmp_avail_proc, depth - 1, core_level);
4495 int nproc = ncores * maxprocpercore;
4496 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4497 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4498 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4500 __kmp_affinity_type = affinity_none;
4504 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4505 for (
int i = 0; i < nproc; i++) {
4511 for (
int i = 0; i < __kmp_avail_proc; i++) {
4512 int proc = address2os[i].second;
4514 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4516 if (core == lastcore) {
4523 procarr[core * maxprocpercore + inlastcore] = proc;
4526 if (__kmp_affinity_compact >= depth) {
4527 __kmp_affinity_compact = depth - 1;
4532 if (__kmp_affinity_dups) {
4533 __kmp_affinity_num_masks = __kmp_avail_proc;
4535 __kmp_affinity_num_masks = numUnique;
4538 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4539 (__kmp_affinity_num_places > 0) &&
4540 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4541 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4544 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4548 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4549 __kmp_affinity_cmp_Address_child_num);
4553 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4554 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4557 unsigned osId = address2os[i].second;
4558 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4559 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4560 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4561 KMP_CPU_COPY(dest, src);
4562 if (++j >= __kmp_affinity_num_masks) {
4566 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4571 KMP_ASSERT2(0,
"Unexpected affinity setting");
4574 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4575 machine_hierarchy.init(address2os, __kmp_avail_proc);
4577 #undef KMP_EXIT_AFF_NONE
4579 void __kmp_affinity_initialize(
void) {
4588 int disabled = (__kmp_affinity_type == affinity_disabled);
4589 if (!KMP_AFFINITY_CAPABLE()) {
4590 KMP_ASSERT(disabled);
4593 __kmp_affinity_type = affinity_none;
4595 __kmp_aux_affinity_initialize();
4597 __kmp_affinity_type = affinity_disabled;
4601 void __kmp_affinity_uninitialize(
void) {
4602 if (__kmp_affinity_masks != NULL) {
4603 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4604 __kmp_affinity_masks = NULL;
4606 if (__kmp_affin_fullMask != NULL) {
4607 KMP_CPU_FREE(__kmp_affin_fullMask);
4608 __kmp_affin_fullMask = NULL;
4610 __kmp_affinity_num_masks = 0;
4611 __kmp_affinity_type = affinity_default;
4612 __kmp_affinity_num_places = 0;
4613 if (__kmp_affinity_proclist != NULL) {
4614 __kmp_free(__kmp_affinity_proclist);
4615 __kmp_affinity_proclist = NULL;
4617 if (address2os != NULL) {
4618 __kmp_free(address2os);
4621 if (procarr != NULL) {
4622 __kmp_free(procarr);
4626 if (__kmp_hwloc_topology != NULL) {
4627 hwloc_topology_destroy(__kmp_hwloc_topology);
4628 __kmp_hwloc_topology = NULL;
4631 KMPAffinity::destroy_api();
4634 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4635 if (!KMP_AFFINITY_CAPABLE()) {
4639 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4640 if (th->th.th_affin_mask == NULL) {
4641 KMP_CPU_ALLOC(th->th.th_affin_mask);
4643 KMP_CPU_ZERO(th->th.th_affin_mask);
4650 kmp_affin_mask_t *mask;
4653 if (KMP_AFFINITY_NON_PROC_BIND) {
4654 if ((__kmp_affinity_type == affinity_none) ||
4655 (__kmp_affinity_type == affinity_balanced)) {
4656 #if KMP_GROUP_AFFINITY
4657 if (__kmp_num_proc_groups > 1) {
4661 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4663 mask = __kmp_affin_fullMask;
4665 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4666 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4667 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4671 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4672 #if KMP_GROUP_AFFINITY
4673 if (__kmp_num_proc_groups > 1) {
4677 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4679 mask = __kmp_affin_fullMask;
4683 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4684 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4685 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4689 th->th.th_current_place = i;
4691 th->th.th_new_place = i;
4692 th->th.th_first_place = 0;
4693 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4694 }
else if (KMP_AFFINITY_NON_PROC_BIND) {
4697 th->th.th_first_place = 0;
4698 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4701 if (i == KMP_PLACE_ALL) {
4702 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4705 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4709 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4711 if (__kmp_affinity_verbose
4713 && (__kmp_affinity_type == affinity_none ||
4714 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4715 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4716 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4717 th->th.th_affin_mask);
4718 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4719 __kmp_gettid(), gtid, buf);
4726 if (__kmp_affinity_type == affinity_none) {
4727 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4730 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4733 void __kmp_affinity_set_place(
int gtid) {
4734 if (!KMP_AFFINITY_CAPABLE()) {
4738 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4740 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current "
4742 gtid, th->th.th_new_place, th->th.th_current_place));
4745 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4746 KMP_ASSERT(th->th.th_new_place >= 0);
4747 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4748 if (th->th.th_first_place <= th->th.th_last_place) {
4749 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4750 (th->th.th_new_place <= th->th.th_last_place));
4752 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4753 (th->th.th_new_place >= th->th.th_last_place));
4758 kmp_affin_mask_t *mask =
4759 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4760 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4761 th->th.th_current_place = th->th.th_new_place;
4763 if (__kmp_affinity_verbose) {
4764 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4765 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4766 th->th.th_affin_mask);
4767 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4768 __kmp_gettid(), gtid, buf);
4770 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4773 int __kmp_aux_set_affinity(
void **mask) {
4778 if (!KMP_AFFINITY_CAPABLE()) {
4782 gtid = __kmp_entry_gtid();
4783 KA_TRACE(1000, (
""); {
4784 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4785 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4786 (kmp_affin_mask_t *)(*mask));
4788 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4792 if (__kmp_env_consistency_check) {
4793 if ((mask == NULL) || (*mask == NULL)) {
4794 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4799 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4800 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4801 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4803 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4808 if (num_procs == 0) {
4809 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4812 #if KMP_GROUP_AFFINITY
4813 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4814 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4820 th = __kmp_threads[gtid];
4821 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4822 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4824 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4827 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4828 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4829 th->th.th_first_place = 0;
4830 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4833 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4838 int __kmp_aux_get_affinity(
void **mask) {
4843 if (!KMP_AFFINITY_CAPABLE()) {
4847 gtid = __kmp_entry_gtid();
4848 th = __kmp_threads[gtid];
4849 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4851 KA_TRACE(1000, (
""); {
4852 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4853 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4854 th->th.th_affin_mask);
4855 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4859 if (__kmp_env_consistency_check) {
4860 if ((mask == NULL) || (*mask == NULL)) {
4861 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4867 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4868 KA_TRACE(1000, (
""); {
4869 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4870 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4871 (kmp_affin_mask_t *)(*mask));
4872 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4879 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4885 int __kmp_aux_get_affinity_max_proc() {
4886 if (!KMP_AFFINITY_CAPABLE()) {
4889 #if KMP_GROUP_AFFINITY
4890 if (__kmp_num_proc_groups > 1) {
4891 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4897 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4898 if (!KMP_AFFINITY_CAPABLE()) {
4902 KA_TRACE(1000, (
""); {
4903 int gtid = __kmp_entry_gtid();
4904 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4905 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4906 (kmp_affin_mask_t *)(*mask));
4907 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in "
4908 "affinity mask for thread %d = %s\n",
4912 if (__kmp_env_consistency_check) {
4913 if ((mask == NULL) || (*mask == NULL)) {
4914 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4918 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4921 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4925 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4929 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
4930 if (!KMP_AFFINITY_CAPABLE()) {
4934 KA_TRACE(1000, (
""); {
4935 int gtid = __kmp_entry_gtid();
4936 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4937 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4938 (kmp_affin_mask_t *)(*mask));
4939 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in "
4940 "affinity mask for thread %d = %s\n",
4944 if (__kmp_env_consistency_check) {
4945 if ((mask == NULL) || (*mask == NULL)) {
4946 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4950 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4953 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4957 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4961 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
4962 if (!KMP_AFFINITY_CAPABLE()) {
4966 KA_TRACE(1000, (
""); {
4967 int gtid = __kmp_entry_gtid();
4968 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4969 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4970 (kmp_affin_mask_t *)(*mask));
4971 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in "
4972 "affinity mask for thread %d = %s\n",
4976 if (__kmp_env_consistency_check) {
4977 if ((mask == NULL) || (*mask == NULL)) {
4978 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4982 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4985 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4989 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4993 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
4994 KMP_DEBUG_ASSERT(th);
4995 bool fine_gran =
true;
4996 int tid = th->th.th_info.ds.ds_tid;
4998 switch (__kmp_affinity_gran) {
4999 case affinity_gran_fine:
5000 case affinity_gran_thread:
5002 case affinity_gran_core:
5003 if (__kmp_nThreadsPerCore > 1) {
5007 case affinity_gran_package:
5008 if (nCoresPerPkg > 1) {
5016 if (__kmp_affinity_uniform_topology()) {
5020 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5022 int ncores = __kmp_ncores;
5023 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5024 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5028 int chunk = nthreads / ncores;
5030 int big_cores = nthreads % ncores;
5032 int big_nth = (chunk + 1) * big_cores;
5033 if (tid < big_nth) {
5034 coreID = tid / (chunk + 1);
5035 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5037 coreID = (tid - big_cores) / chunk;
5038 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5041 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5042 "Illegal set affinity operation when not capable");
5044 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5048 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5049 KMP_CPU_SET(osID, mask);
5051 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5053 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5054 KMP_CPU_SET(osID, mask);
5057 if (__kmp_affinity_verbose) {
5058 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5059 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5060 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5061 __kmp_gettid(), tid, buf);
5063 __kmp_set_system_affinity(mask, TRUE);
5066 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5069 int core_level = __kmp_affinity_find_core_level(
5070 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5071 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5072 __kmp_aff_depth - 1, core_level);
5073 int nth_per_core = __kmp_affinity_max_proc_per_core(
5074 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5078 if (nthreads == __kmp_avail_proc) {
5080 int osID = address2os[tid].second;
5081 KMP_CPU_SET(osID, mask);
5083 int core = __kmp_affinity_find_core(address2os, tid,
5084 __kmp_aff_depth - 1, core_level);
5085 for (
int i = 0; i < __kmp_avail_proc; i++) {
5086 int osID = address2os[i].second;
5087 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5088 core_level) == core) {
5089 KMP_CPU_SET(osID, mask);
5093 }
else if (nthreads <= ncores) {
5096 for (
int i = 0; i < ncores; i++) {
5099 for (
int j = 0; j < nth_per_core; j++) {
5100 if (procarr[i * nth_per_core + j] != -1) {
5107 for (
int j = 0; j < nth_per_core; j++) {
5108 int osID = procarr[i * nth_per_core + j];
5110 KMP_CPU_SET(osID, mask);
5126 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5128 int *ncores_with_x_procs =
5129 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5131 int *ncores_with_x_to_max_procs =
5132 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5134 for (
int i = 0; i <= nth_per_core; i++) {
5135 ncores_with_x_procs[i] = 0;
5136 ncores_with_x_to_max_procs[i] = 0;
5139 for (
int i = 0; i < ncores; i++) {
5141 for (
int j = 0; j < nth_per_core; j++) {
5142 if (procarr[i * nth_per_core + j] != -1) {
5146 nproc_at_core[i] = cnt;
5147 ncores_with_x_procs[cnt]++;
5150 for (
int i = 0; i <= nth_per_core; i++) {
5151 for (
int j = i; j <= nth_per_core; j++) {
5152 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5157 int nproc = nth_per_core * ncores;
5159 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5160 for (
int i = 0; i < nproc; i++) {
5167 for (
int j = 1; j <= nth_per_core; j++) {
5168 int cnt = ncores_with_x_to_max_procs[j];
5169 for (
int i = 0; i < ncores; i++) {
5171 if (nproc_at_core[i] == 0) {
5174 for (
int k = 0; k < nth_per_core; k++) {
5175 if (procarr[i * nth_per_core + k] != -1) {
5176 if (newarr[i * nth_per_core + k] == 0) {
5177 newarr[i * nth_per_core + k] = 1;
5183 newarr[i * nth_per_core + k]++;
5191 if (cnt == 0 || nth == 0) {
5202 for (
int i = 0; i < nproc; i++) {
5206 int osID = procarr[i];
5207 KMP_CPU_SET(osID, mask);
5209 int coreID = i / nth_per_core;
5210 for (
int ii = 0; ii < nth_per_core; ii++) {
5211 int osID = procarr[coreID * nth_per_core + ii];
5213 KMP_CPU_SET(osID, mask);
5223 if (__kmp_affinity_verbose) {
5224 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5225 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5226 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5227 __kmp_gettid(), tid, buf);
5229 __kmp_set_system_affinity(mask, TRUE);
5233 #if KMP_OS_LINUX || KMP_OS_FREEBSD
5247 kmp_set_thread_affinity_mask_initial()
5252 int gtid = __kmp_get_gtid();
5255 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5256 "non-omp thread, returning\n"));
5259 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5260 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5261 "affinity not initialized, returning\n"));
5264 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: "
5265 "set full mask for thread %d\n",
5267 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5268 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);