15 #include "kmp_affinity.h" 19 #include "kmp_wrapper_getpid.h" 20 #if KMP_USE_HIER_SCHED 21 #include "kmp_dispatch_hier.h" 25 static hierarchy_info machine_hierarchy;
27 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
29 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
40 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED 50 bool KMPAffinity::picked_api =
false;
52 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
56 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity *affinity_dispatch;
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
68 affinity_dispatch =
new KMPHwlocAffinity();
72 affinity_dispatch =
new KMPNativeAffinity();
74 __kmp_affinity_dispatch = affinity_dispatch;
78 void KMPAffinity::destroy_api() {
79 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
87 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
88 kmp_affin_mask_t *mask) {
89 KMP_ASSERT(buf_len >= 40);
91 char *end = buf + buf_len - 1;
96 if (i == mask->end()) {
97 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
100 KMP_ASSERT(scan <= end);
104 KMP_SNPRINTF(scan, end - scan + 1,
"{%d", i);
105 while (*scan !=
'\0')
108 for (; i != mask->end(); i = mask->next(i)) {
109 if (!KMP_CPU_ISSET(i, mask)) {
117 if (end - scan < 15) {
120 KMP_SNPRINTF(scan, end - scan + 1,
",%-d", i);
121 while (*scan !=
'\0')
124 if (i != mask->end()) {
125 KMP_SNPRINTF(scan, end - scan + 1,
",...");
126 while (*scan !=
'\0')
129 KMP_SNPRINTF(scan, end - scan + 1,
"}");
130 while (*scan !=
'\0')
132 KMP_ASSERT(scan <= end);
136 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
139 #if KMP_GROUP_AFFINITY 141 if (__kmp_num_proc_groups > 1) {
143 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
144 for (group = 0; group < __kmp_num_proc_groups; group++) {
146 int num = __kmp_GetActiveProcessorCount(group);
147 for (i = 0; i < num; i++) {
148 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
157 for (proc = 0; proc < __kmp_xproc; proc++) {
158 KMP_CPU_SET(proc, mask);
174 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
176 KMP_DEBUG_ASSERT(numAddrs > 0);
177 int depth = address2os->first.depth;
178 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
179 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
181 for (labCt = 0; labCt < depth; labCt++) {
182 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
183 lastLabel[labCt] = address2os[0].first.labels[labCt];
186 for (i = 1; i < numAddrs; i++) {
187 for (labCt = 0; labCt < depth; labCt++) {
188 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
190 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
192 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
195 lastLabel[labCt] = address2os[i].first.labels[labCt];
199 for (labCt = 0; labCt < depth; labCt++) {
200 address2os[i].first.childNums[labCt] = counts[labCt];
202 for (; labCt < (int)Address::maxDepth; labCt++) {
203 address2os[i].first.childNums[labCt] = 0;
206 __kmp_free(lastLabel);
221 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
223 static int nCoresPerPkg, nPackages;
224 static int __kmp_nThreadsPerCore;
225 #ifndef KMP_DFLT_NTH_CORES 226 static int __kmp_ncores;
228 static int *__kmp_pu_os_idx = NULL;
234 inline static bool __kmp_affinity_uniform_topology() {
235 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
240 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
241 int depth,
int pkgLevel,
242 int coreLevel,
int threadLevel) {
245 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
246 for (proc = 0; proc < len; proc++) {
249 __kmp_str_buf_init(&buf);
250 for (level = 0; level < depth; level++) {
251 if (level == threadLevel) {
252 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
253 }
else if (level == coreLevel) {
254 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
255 }
else if (level == pkgLevel) {
256 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
257 }
else if (level > pkgLevel) {
258 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
259 level - pkgLevel - 1);
261 __kmp_str_buf_print(&buf,
"L%d ", level);
263 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
265 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
267 __kmp_str_buf_free(&buf);
273 static void __kmp_affinity_print_hwloc_tp(AddrUnsPair *addrP,
int len,
274 int depth,
int *levels) {
277 __kmp_str_buf_init(&buf);
278 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
279 for (proc = 0; proc < len; proc++) {
280 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Package),
281 addrP[proc].first.labels[0]);
285 if (__kmp_numa_detected)
287 if (levels[level++] > 0)
288 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Node),
289 addrP[proc].first.labels[label++]);
290 if (__kmp_tile_depth > 0)
292 if (levels[level++] > 0)
293 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Tile),
294 addrP[proc].first.labels[label++]);
295 if (levels[level++] > 0)
297 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Core),
298 addrP[proc].first.labels[label++]);
299 if (levels[level++] > 0)
301 __kmp_str_buf_print(&buf,
"%s %d ", KMP_I18N_STR(Thread),
302 addrP[proc].first.labels[label++]);
303 KMP_DEBUG_ASSERT(label == depth);
305 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", addrP[proc].second, buf.str);
306 __kmp_str_buf_clear(&buf);
308 __kmp_str_buf_free(&buf);
311 static int nNodePerPkg, nTilePerPkg, nTilePerNode, nCorePerNode, nCorePerTile;
318 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *addrP,
int nTh,
319 int depth,
int *levels) {
323 int new_depth = depth;
324 for (level = depth - 1; level > 0; --level) {
327 for (i = 1; i < nTh; ++i) {
328 if (addrP[0].first.labels[level] != addrP[i].first.labels[level]) {
334 if (!radix1_detected)
339 if (level == new_depth) {
342 for (i = 0; i < nTh; ++i) {
343 addrP[i].first.depth--;
348 for (j = level; j < new_depth; ++j) {
349 for (i = 0; i < nTh; ++i) {
350 addrP[i].first.labels[j] = addrP[i].first.labels[j + 1];
351 addrP[i].first.depth--;
364 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
365 hwloc_obj_type_t type) {
368 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
369 obj->logical_index, type, 0);
371 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
373 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
380 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
381 hwloc_obj_t o,
unsigned depth,
383 if (o->depth == depth) {
389 for (
unsigned i = 0; i < o->arity; i++)
390 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
394 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
395 hwloc_obj_type_t type,
397 if (!hwloc_compare_types(o->type, type)) {
403 for (
unsigned i = 0; i < o->arity; i++)
404 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
408 static int __kmp_hwloc_process_obj_core_pu(AddrUnsPair *addrPair,
410 int &num_active_cores,
411 hwloc_obj_t obj,
int depth,
413 hwloc_obj_t core = NULL;
414 hwloc_topology_t &tp = __kmp_hwloc_topology;
415 int NC = __kmp_hwloc_count_children_by_type(tp, obj, HWLOC_OBJ_CORE, &core);
416 for (
int core_id = 0; core_id < NC; ++core_id, core = core->next_cousin) {
417 hwloc_obj_t pu = NULL;
418 KMP_DEBUG_ASSERT(core != NULL);
419 int num_active_threads = 0;
420 int NT = __kmp_hwloc_count_children_by_type(tp, core, HWLOC_OBJ_PU, &pu);
422 for (
int pu_id = 0; pu_id < NT; ++pu_id, pu = pu->next_cousin) {
423 KMP_DEBUG_ASSERT(pu != NULL);
424 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
426 Address addr(depth + 2);
427 KA_TRACE(20, (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
428 obj->os_index, obj->logical_index, core->os_index,
429 core->logical_index, pu->os_index, pu->logical_index));
430 for (
int i = 0; i < depth; ++i)
431 addr.labels[i] = labels[i];
432 addr.labels[depth] = core_id;
433 addr.labels[depth + 1] = pu_id;
434 addrPair[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
435 __kmp_pu_os_idx[nActiveThreads] = pu->os_index;
437 ++num_active_threads;
439 if (num_active_threads) {
442 if (num_active_threads > __kmp_nThreadsPerCore)
443 __kmp_nThreadsPerCore = num_active_threads;
451 static int __kmp_hwloc_check_numa() {
452 hwloc_topology_t &tp = __kmp_hwloc_topology;
453 hwloc_obj_t hT, hC, hL, hN, hS;
457 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, 0);
462 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
463 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
464 KMP_DEBUG_ASSERT(hS != NULL);
465 if (hN != NULL && hN->depth > hS->depth) {
466 __kmp_numa_detected = TRUE;
467 if (__kmp_affinity_gran == affinity_gran_node) {
468 __kmp_affinity_gran == affinity_gran_numa;
473 depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
474 hL = hwloc_get_ancestor_obj_by_depth(tp, depth, hT);
477 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1)
478 __kmp_tile_depth = depth;
482 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
483 kmp_i18n_id_t *
const msg_id) {
484 hwloc_topology_t &tp = __kmp_hwloc_topology;
486 *msg_id = kmp_i18n_null;
489 kmp_affin_mask_t *oldMask;
490 KMP_CPU_ALLOC(oldMask);
491 __kmp_get_system_affinity(oldMask, TRUE);
492 __kmp_hwloc_check_numa();
494 if (!KMP_AFFINITY_CAPABLE()) {
497 KMP_ASSERT(__kmp_affinity_type == affinity_none);
499 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
500 hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
501 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
502 hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
503 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
504 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
505 if (__kmp_affinity_verbose) {
506 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
507 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
508 if (__kmp_affinity_uniform_topology()) {
509 KMP_INFORM(Uniform,
"KMP_AFFINITY");
511 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
513 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
514 __kmp_nThreadsPerCore, __kmp_ncores);
516 KMP_CPU_FREE(oldMask);
521 int levels[5] = {0, 1, 2, 3, 4};
523 if (__kmp_numa_detected)
525 if (__kmp_tile_depth)
529 AddrUnsPair *retval =
530 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
531 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
532 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
539 hwloc_obj_t socket, node, tile;
540 int nActiveThreads = 0;
543 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
544 nNodePerPkg = nTilePerPkg = nTilePerNode = nCorePerNode = nCorePerTile = 0;
545 for (socket = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0); socket != NULL;
546 socket = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, socket),
548 labels[0] = socket_id;
549 if (__kmp_numa_detected) {
551 int n_active_nodes = 0;
553 NN = __kmp_hwloc_count_children_by_type(tp, socket, HWLOC_OBJ_NUMANODE,
555 for (
int node_id = 0; node_id < NN; ++node_id, node = node->next_cousin) {
557 if (__kmp_tile_depth) {
560 int n_active_tiles = 0;
562 NT = __kmp_hwloc_count_children_by_depth(tp, node, __kmp_tile_depth,
564 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
566 int n_active_cores = 0;
567 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
568 n_active_cores, tile, 3, labels);
569 if (n_active_cores) {
571 if (n_active_cores > nCorePerTile)
572 nCorePerTile = n_active_cores;
575 if (n_active_tiles) {
577 if (n_active_tiles > nTilePerNode)
578 nTilePerNode = n_active_tiles;
582 int n_active_cores = 0;
583 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
584 n_active_cores, node, 2, labels);
585 if (n_active_cores) {
587 if (n_active_cores > nCorePerNode)
588 nCorePerNode = n_active_cores;
592 if (n_active_nodes) {
594 if (n_active_nodes > nNodePerPkg)
595 nNodePerPkg = n_active_nodes;
598 if (__kmp_tile_depth) {
601 int n_active_tiles = 0;
603 NT = __kmp_hwloc_count_children_by_depth(tp, socket, __kmp_tile_depth,
605 for (
int tl_id = 0; tl_id < NT; ++tl_id, tile = tile->next_cousin) {
607 int n_active_cores = 0;
608 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads,
609 n_active_cores, tile, 2, labels);
610 if (n_active_cores) {
612 if (n_active_cores > nCorePerTile)
613 nCorePerTile = n_active_cores;
616 if (n_active_tiles) {
618 if (n_active_tiles > nTilePerPkg)
619 nTilePerPkg = n_active_tiles;
623 int n_active_cores = 0;
624 __kmp_hwloc_process_obj_core_pu(retval, nActiveThreads, n_active_cores,
626 if (n_active_cores) {
628 if (n_active_cores > nCoresPerPkg)
629 nCoresPerPkg = n_active_cores;
636 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
637 KMP_ASSERT(nActiveThreads > 0);
638 if (nActiveThreads == 1) {
639 __kmp_ncores = nPackages = 1;
640 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
641 if (__kmp_affinity_verbose) {
642 char buf[KMP_AFFIN_MASK_PRINT_LEN];
643 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
645 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
646 if (__kmp_affinity_respect_mask) {
647 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
649 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
651 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
652 KMP_INFORM(Uniform,
"KMP_AFFINITY");
653 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
654 __kmp_nThreadsPerCore, __kmp_ncores);
657 if (__kmp_affinity_type == affinity_none) {
659 KMP_CPU_FREE(oldMask);
665 addr.labels[0] = retval[0].first.labels[0];
666 retval[0].first = addr;
668 if (__kmp_affinity_gran_levels < 0) {
669 __kmp_affinity_gran_levels = 0;
672 if (__kmp_affinity_verbose) {
673 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
676 *address2os = retval;
677 KMP_CPU_FREE(oldMask);
682 qsort(retval, nActiveThreads,
sizeof(*retval),
683 __kmp_affinity_cmp_Address_labels);
686 int nPUs = nPackages * __kmp_nThreadsPerCore;
687 if (__kmp_numa_detected) {
688 if (__kmp_tile_depth) {
689 nPUs *= (nNodePerPkg * nTilePerNode * nCorePerTile);
691 nPUs *= (nNodePerPkg * nCorePerNode);
694 if (__kmp_tile_depth) {
695 nPUs *= (nTilePerPkg * nCorePerTile);
697 nPUs *= nCoresPerPkg;
700 unsigned uniform = (nPUs == nActiveThreads);
703 if (__kmp_affinity_verbose) {
704 char mask[KMP_AFFIN_MASK_PRINT_LEN];
705 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
706 if (__kmp_affinity_respect_mask) {
707 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
709 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
711 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
713 KMP_INFORM(Uniform,
"KMP_AFFINITY");
715 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
717 if (__kmp_numa_detected) {
718 if (__kmp_tile_depth) {
719 KMP_INFORM(TopologyExtraNoTi,
"KMP_AFFINITY", nPackages, nNodePerPkg,
720 nTilePerNode, nCorePerTile, __kmp_nThreadsPerCore,
723 KMP_INFORM(TopologyExtraNode,
"KMP_AFFINITY", nPackages, nNodePerPkg,
724 nCorePerNode, __kmp_nThreadsPerCore, __kmp_ncores);
725 nPUs *= (nNodePerPkg * nCorePerNode);
728 if (__kmp_tile_depth) {
729 KMP_INFORM(TopologyExtraTile,
"KMP_AFFINITY", nPackages, nTilePerPkg,
730 nCorePerTile, __kmp_nThreadsPerCore, __kmp_ncores);
733 __kmp_str_buf_init(&buf);
734 __kmp_str_buf_print(&buf,
"%d", nPackages);
735 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
736 __kmp_nThreadsPerCore, __kmp_ncores);
737 __kmp_str_buf_free(&buf);
742 if (__kmp_affinity_type == affinity_none) {
744 KMP_CPU_FREE(oldMask);
748 int depth_full = depth;
751 depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth,
753 KMP_DEBUG_ASSERT(__kmp_affinity_gran != affinity_gran_default);
754 if (__kmp_affinity_gran_levels < 0) {
757 __kmp_affinity_gran_levels = 0;
758 if (__kmp_affinity_gran > affinity_gran_thread) {
759 for (
int i = 1; i <= depth_full; ++i) {
760 if (__kmp_affinity_gran <= i)
762 if (levels[depth_full - i] > 0)
763 __kmp_affinity_gran_levels++;
766 if (__kmp_affinity_gran > affinity_gran_package)
767 __kmp_affinity_gran_levels++;
770 if (__kmp_affinity_verbose)
771 __kmp_affinity_print_hwloc_tp(retval, nActiveThreads, depth, levels);
773 KMP_CPU_FREE(oldMask);
774 *address2os = retval;
777 #endif // KMP_USE_HWLOC 782 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
783 kmp_i18n_id_t *
const msg_id) {
785 *msg_id = kmp_i18n_null;
790 if (!KMP_AFFINITY_CAPABLE()) {
791 KMP_ASSERT(__kmp_affinity_type == affinity_none);
792 __kmp_ncores = nPackages = __kmp_xproc;
793 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
794 if (__kmp_affinity_verbose) {
795 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
796 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
797 KMP_INFORM(Uniform,
"KMP_AFFINITY");
798 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
799 __kmp_nThreadsPerCore, __kmp_ncores);
808 __kmp_ncores = nPackages = __kmp_avail_proc;
809 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
810 if (__kmp_affinity_verbose) {
811 char buf[KMP_AFFIN_MASK_PRINT_LEN];
812 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
813 __kmp_affin_fullMask);
815 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
816 if (__kmp_affinity_respect_mask) {
817 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
819 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
821 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
822 KMP_INFORM(Uniform,
"KMP_AFFINITY");
823 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
824 __kmp_nThreadsPerCore, __kmp_ncores);
826 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
827 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
828 if (__kmp_affinity_type == affinity_none) {
831 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
832 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
834 __kmp_pu_os_idx[avail_ct++] = i;
841 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
844 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
846 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
849 __kmp_pu_os_idx[avail_ct] = i;
852 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
854 if (__kmp_affinity_verbose) {
855 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
858 if (__kmp_affinity_gran_levels < 0) {
861 if (__kmp_affinity_gran > affinity_gran_package) {
862 __kmp_affinity_gran_levels = 1;
864 __kmp_affinity_gran_levels = 0;
870 #if KMP_GROUP_AFFINITY 876 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
877 kmp_i18n_id_t *
const msg_id) {
879 *msg_id = kmp_i18n_null;
883 if (!KMP_AFFINITY_CAPABLE()) {
890 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
891 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
892 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
895 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
897 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
900 __kmp_pu_os_idx[avail_ct] = i;
902 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
903 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
904 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
906 if (__kmp_affinity_verbose) {
907 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
912 if (__kmp_affinity_gran_levels < 0) {
913 if (__kmp_affinity_gran == affinity_gran_group) {
914 __kmp_affinity_gran_levels = 1;
915 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
916 (__kmp_affinity_gran == affinity_gran_thread)) {
917 __kmp_affinity_gran_levels = 0;
919 const char *gran_str = NULL;
920 if (__kmp_affinity_gran == affinity_gran_core) {
922 }
else if (__kmp_affinity_gran == affinity_gran_package) {
923 gran_str =
"package";
924 }
else if (__kmp_affinity_gran == affinity_gran_node) {
932 __kmp_affinity_gran_levels = 0;
940 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 942 static int __kmp_cpuid_mask_width(
int count) {
945 while ((1 << r) < count)
950 class apicThreadInfo {
954 unsigned maxCoresPerPkg;
955 unsigned maxThreadsPerPkg;
961 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
963 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
964 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
965 if (aa->pkgId < bb->pkgId)
967 if (aa->pkgId > bb->pkgId)
969 if (aa->coreId < bb->coreId)
971 if (aa->coreId > bb->coreId)
973 if (aa->threadId < bb->threadId)
975 if (aa->threadId > bb->threadId)
984 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
985 kmp_i18n_id_t *
const msg_id) {
988 *msg_id = kmp_i18n_null;
991 __kmp_x86_cpuid(0, 0, &buf);
993 *msg_id = kmp_i18n_str_NoLeaf4Support;
1002 if (!KMP_AFFINITY_CAPABLE()) {
1005 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1011 __kmp_x86_cpuid(1, 0, &buf);
1012 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1013 if (maxThreadsPerPkg == 0) {
1014 maxThreadsPerPkg = 1;
1028 __kmp_x86_cpuid(0, 0, &buf);
1030 __kmp_x86_cpuid(4, 0, &buf);
1031 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1049 __kmp_ncores = __kmp_xproc;
1050 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1051 __kmp_nThreadsPerCore = 1;
1052 if (__kmp_affinity_verbose) {
1053 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
1054 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1055 if (__kmp_affinity_uniform_topology()) {
1056 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1058 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1060 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1061 __kmp_nThreadsPerCore, __kmp_ncores);
1071 kmp_affin_mask_t *oldMask;
1072 KMP_CPU_ALLOC(oldMask);
1073 KMP_ASSERT(oldMask != NULL);
1074 __kmp_get_system_affinity(oldMask, TRUE);
1102 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1103 __kmp_avail_proc *
sizeof(apicThreadInfo));
1104 unsigned nApics = 0;
1105 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
1107 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
1110 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1112 __kmp_affinity_dispatch->bind_thread(i);
1113 threadInfo[nApics].osId = i;
1116 __kmp_x86_cpuid(1, 0, &buf);
1117 if (((buf.edx >> 9) & 1) == 0) {
1118 __kmp_set_system_affinity(oldMask, TRUE);
1119 __kmp_free(threadInfo);
1120 KMP_CPU_FREE(oldMask);
1121 *msg_id = kmp_i18n_str_ApicNotPresent;
1124 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1125 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1126 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1127 threadInfo[nApics].maxThreadsPerPkg = 1;
1136 __kmp_x86_cpuid(0, 0, &buf);
1138 __kmp_x86_cpuid(4, 0, &buf);
1139 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1141 threadInfo[nApics].maxCoresPerPkg = 1;
1145 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
1146 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1148 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
1149 int widthT = widthCT - widthC;
1154 __kmp_set_system_affinity(oldMask, TRUE);
1155 __kmp_free(threadInfo);
1156 KMP_CPU_FREE(oldMask);
1157 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1161 int maskC = (1 << widthC) - 1;
1162 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
1164 int maskT = (1 << widthT) - 1;
1165 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
1172 __kmp_set_system_affinity(oldMask, TRUE);
1181 KMP_ASSERT(nApics > 0);
1183 __kmp_ncores = nPackages = 1;
1184 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1185 if (__kmp_affinity_verbose) {
1186 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1187 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1189 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1190 if (__kmp_affinity_respect_mask) {
1191 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1193 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1195 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1196 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1197 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1198 __kmp_nThreadsPerCore, __kmp_ncores);
1201 if (__kmp_affinity_type == affinity_none) {
1202 __kmp_free(threadInfo);
1203 KMP_CPU_FREE(oldMask);
1207 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1209 addr.labels[0] = threadInfo[0].pkgId;
1210 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1212 if (__kmp_affinity_gran_levels < 0) {
1213 __kmp_affinity_gran_levels = 0;
1216 if (__kmp_affinity_verbose) {
1217 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1220 __kmp_free(threadInfo);
1221 KMP_CPU_FREE(oldMask);
1226 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1227 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1244 __kmp_nThreadsPerCore = 1;
1245 unsigned nCores = 1;
1248 unsigned lastPkgId = threadInfo[0].pkgId;
1249 unsigned coreCt = 1;
1250 unsigned lastCoreId = threadInfo[0].coreId;
1251 unsigned threadCt = 1;
1252 unsigned lastThreadId = threadInfo[0].threadId;
1255 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1256 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1258 for (i = 1; i < nApics; i++) {
1259 if (threadInfo[i].pkgId != lastPkgId) {
1262 lastPkgId = threadInfo[i].pkgId;
1263 if ((
int)coreCt > nCoresPerPkg)
1264 nCoresPerPkg = coreCt;
1266 lastCoreId = threadInfo[i].coreId;
1267 if ((
int)threadCt > __kmp_nThreadsPerCore)
1268 __kmp_nThreadsPerCore = threadCt;
1270 lastThreadId = threadInfo[i].threadId;
1274 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1275 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1279 if (threadInfo[i].coreId != lastCoreId) {
1282 lastCoreId = threadInfo[i].coreId;
1283 if ((
int)threadCt > __kmp_nThreadsPerCore)
1284 __kmp_nThreadsPerCore = threadCt;
1286 lastThreadId = threadInfo[i].threadId;
1287 }
else if (threadInfo[i].threadId != lastThreadId) {
1289 lastThreadId = threadInfo[i].threadId;
1291 __kmp_free(threadInfo);
1292 KMP_CPU_FREE(oldMask);
1293 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1299 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1300 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1301 __kmp_free(threadInfo);
1302 KMP_CPU_FREE(oldMask);
1303 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1308 if ((
int)coreCt > nCoresPerPkg)
1309 nCoresPerPkg = coreCt;
1310 if ((
int)threadCt > __kmp_nThreadsPerCore)
1311 __kmp_nThreadsPerCore = threadCt;
1317 __kmp_ncores = nCores;
1318 if (__kmp_affinity_verbose) {
1319 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1320 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1322 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1323 if (__kmp_affinity_respect_mask) {
1324 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1326 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1328 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1329 if (__kmp_affinity_uniform_topology()) {
1330 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1332 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1334 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1335 __kmp_nThreadsPerCore, __kmp_ncores);
1337 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1338 KMP_DEBUG_ASSERT(nApics == (
unsigned)__kmp_avail_proc);
1339 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1340 for (i = 0; i < nApics; ++i) {
1341 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1343 if (__kmp_affinity_type == affinity_none) {
1344 __kmp_free(threadInfo);
1345 KMP_CPU_FREE(oldMask);
1353 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1355 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1356 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1358 KMP_ASSERT(depth > 0);
1359 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1361 for (i = 0; i < nApics; ++i) {
1362 Address addr(depth);
1363 unsigned os = threadInfo[i].osId;
1366 if (pkgLevel >= 0) {
1367 addr.labels[d++] = threadInfo[i].pkgId;
1369 if (coreLevel >= 0) {
1370 addr.labels[d++] = threadInfo[i].coreId;
1372 if (threadLevel >= 0) {
1373 addr.labels[d++] = threadInfo[i].threadId;
1375 (*address2os)[i] = AddrUnsPair(addr, os);
1378 if (__kmp_affinity_gran_levels < 0) {
1381 __kmp_affinity_gran_levels = 0;
1382 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1383 __kmp_affinity_gran_levels++;
1385 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1386 __kmp_affinity_gran_levels++;
1388 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1389 __kmp_affinity_gran_levels++;
1393 if (__kmp_affinity_verbose) {
1394 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1395 coreLevel, threadLevel);
1398 __kmp_free(threadInfo);
1399 KMP_CPU_FREE(oldMask);
1406 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1407 kmp_i18n_id_t *
const msg_id) {
1410 *msg_id = kmp_i18n_null;
1413 __kmp_x86_cpuid(0, 0, &buf);
1415 *msg_id = kmp_i18n_str_NoLeaf11Support;
1418 __kmp_x86_cpuid(11, 0, &buf);
1420 *msg_id = kmp_i18n_str_NoLeaf11Support;
1429 int threadLevel = -1;
1432 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1434 for (level = 0;; level++) {
1445 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1448 __kmp_x86_cpuid(11, level, &buf);
1457 int kind = (buf.ecx >> 8) & 0xff;
1460 threadLevel = level;
1463 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1464 if (__kmp_nThreadsPerCore == 0) {
1465 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1468 }
else if (kind == 2) {
1472 nCoresPerPkg = buf.ebx & 0xffff;
1473 if (nCoresPerPkg == 0) {
1474 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1479 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1482 if (pkgLevel >= 0) {
1486 nPackages = buf.ebx & 0xffff;
1487 if (nPackages == 0) {
1488 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1499 if (threadLevel >= 0) {
1500 threadLevel = depth - threadLevel - 1;
1502 if (coreLevel >= 0) {
1503 coreLevel = depth - coreLevel - 1;
1505 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1506 pkgLevel = depth - pkgLevel - 1;
1513 if (!KMP_AFFINITY_CAPABLE()) {
1516 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1518 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1519 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1520 if (__kmp_affinity_verbose) {
1521 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1522 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1523 if (__kmp_affinity_uniform_topology()) {
1524 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1526 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1528 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1529 __kmp_nThreadsPerCore, __kmp_ncores);
1539 kmp_affin_mask_t *oldMask;
1540 KMP_CPU_ALLOC(oldMask);
1541 __kmp_get_system_affinity(oldMask, TRUE);
1544 AddrUnsPair *retval =
1545 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1551 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1553 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1556 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1558 __kmp_affinity_dispatch->bind_thread(proc);
1561 Address addr(depth);
1564 for (level = 0; level < depth; level++) {
1565 __kmp_x86_cpuid(11, level, &buf);
1566 unsigned apicId = buf.edx;
1568 if (level != depth - 1) {
1569 KMP_CPU_FREE(oldMask);
1570 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1573 addr.labels[depth - level - 1] = apicId >> prev_shift;
1577 int shift = buf.eax & 0x1f;
1578 int mask = (1 << shift) - 1;
1579 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1582 if (level != depth) {
1583 KMP_CPU_FREE(oldMask);
1584 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1588 retval[nApics] = AddrUnsPair(addr, proc);
1594 __kmp_set_system_affinity(oldMask, TRUE);
1597 KMP_ASSERT(nApics > 0);
1599 __kmp_ncores = nPackages = 1;
1600 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1601 if (__kmp_affinity_verbose) {
1602 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1603 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1605 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1606 if (__kmp_affinity_respect_mask) {
1607 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1609 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1611 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1612 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1613 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1614 __kmp_nThreadsPerCore, __kmp_ncores);
1617 if (__kmp_affinity_type == affinity_none) {
1619 KMP_CPU_FREE(oldMask);
1625 addr.labels[0] = retval[0].first.labels[pkgLevel];
1626 retval[0].first = addr;
1628 if (__kmp_affinity_gran_levels < 0) {
1629 __kmp_affinity_gran_levels = 0;
1632 if (__kmp_affinity_verbose) {
1633 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1636 *address2os = retval;
1637 KMP_CPU_FREE(oldMask);
1642 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1645 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1646 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1647 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1648 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1649 for (level = 0; level < depth; level++) {
1653 last[level] = retval[0].first.labels[level];
1660 for (proc = 1; (int)proc < nApics; proc++) {
1662 for (level = 0; level < depth; level++) {
1663 if (retval[proc].first.labels[level] != last[level]) {
1665 for (j = level + 1; j < depth; j++) {
1676 last[j] = retval[proc].first.labels[j];
1680 if (counts[level] > maxCt[level]) {
1681 maxCt[level] = counts[level];
1683 last[level] = retval[proc].first.labels[level];
1685 }
else if (level == depth - 1) {
1691 KMP_CPU_FREE(oldMask);
1692 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1702 if (threadLevel >= 0) {
1703 __kmp_nThreadsPerCore = maxCt[threadLevel];
1705 __kmp_nThreadsPerCore = 1;
1707 nPackages = totals[pkgLevel];
1709 if (coreLevel >= 0) {
1710 __kmp_ncores = totals[coreLevel];
1711 nCoresPerPkg = maxCt[coreLevel];
1713 __kmp_ncores = nPackages;
1718 unsigned prod = maxCt[0];
1719 for (level = 1; level < depth; level++) {
1720 prod *= maxCt[level];
1722 bool uniform = (prod == totals[level - 1]);
1725 if (__kmp_affinity_verbose) {
1726 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1727 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1729 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1730 if (__kmp_affinity_respect_mask) {
1731 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1733 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1735 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1737 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1739 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1743 __kmp_str_buf_init(&buf);
1745 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1746 for (level = 1; level <= pkgLevel; level++) {
1747 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1749 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1750 __kmp_nThreadsPerCore, __kmp_ncores);
1752 __kmp_str_buf_free(&buf);
1754 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1755 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1756 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1757 for (proc = 0; (int)proc < nApics; ++proc) {
1758 __kmp_pu_os_idx[proc] = retval[proc].second;
1760 if (__kmp_affinity_type == affinity_none) {
1766 KMP_CPU_FREE(oldMask);
1773 for (level = 0; level < depth; level++) {
1774 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1782 if (new_depth != depth) {
1783 AddrUnsPair *new_retval =
1784 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1785 for (proc = 0; (int)proc < nApics; proc++) {
1786 Address addr(new_depth);
1787 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1790 int newPkgLevel = -1;
1791 int newCoreLevel = -1;
1792 int newThreadLevel = -1;
1793 for (level = 0; level < depth; level++) {
1794 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1798 if (level == pkgLevel) {
1799 newPkgLevel = new_level;
1801 if (level == coreLevel) {
1802 newCoreLevel = new_level;
1804 if (level == threadLevel) {
1805 newThreadLevel = new_level;
1807 for (proc = 0; (int)proc < nApics; proc++) {
1808 new_retval[proc].first.labels[new_level] =
1809 retval[proc].first.labels[level];
1815 retval = new_retval;
1817 pkgLevel = newPkgLevel;
1818 coreLevel = newCoreLevel;
1819 threadLevel = newThreadLevel;
1822 if (__kmp_affinity_gran_levels < 0) {
1825 __kmp_affinity_gran_levels = 0;
1826 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1827 __kmp_affinity_gran_levels++;
1829 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1830 __kmp_affinity_gran_levels++;
1832 if (__kmp_affinity_gran > affinity_gran_package) {
1833 __kmp_affinity_gran_levels++;
1837 if (__kmp_affinity_verbose) {
1838 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1846 KMP_CPU_FREE(oldMask);
1847 *address2os = retval;
1854 #define threadIdIndex 1 1855 #define coreIdIndex 2 1856 #define pkgIdIndex 3 1857 #define nodeIdIndex 4 1859 typedef unsigned *ProcCpuInfo;
1860 static unsigned maxIndex = pkgIdIndex;
1862 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1865 const unsigned *aa = *(
unsigned *
const *)a;
1866 const unsigned *bb = *(
unsigned *
const *)b;
1867 for (i = maxIndex;; i--) {
1878 #if KMP_USE_HIER_SCHED 1880 static void __kmp_dispatch_set_hierarchy_values() {
1886 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1] =
1887 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1888 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L1 + 1] = __kmp_ncores;
1889 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1890 if (__kmp_mic_type >= mic3)
1891 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores / 2;
1893 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1894 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L2 + 1] = __kmp_ncores;
1895 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_L3 + 1] = nPackages;
1896 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_NUMA + 1] = nPackages;
1897 __kmp_hier_max_units[kmp_hier_layer_e::LAYER_LOOP + 1] = 1;
1900 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_THREAD + 1] = 1;
1901 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L1 + 1] =
1902 __kmp_nThreadsPerCore;
1903 #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1904 if (__kmp_mic_type >= mic3)
1905 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1906 2 * __kmp_nThreadsPerCore;
1908 #endif // KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) 1909 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L2 + 1] =
1910 __kmp_nThreadsPerCore;
1911 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_L3 + 1] =
1912 nCoresPerPkg * __kmp_nThreadsPerCore;
1913 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_NUMA + 1] =
1914 nCoresPerPkg * __kmp_nThreadsPerCore;
1915 __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LOOP + 1] =
1916 nPackages * nCoresPerPkg * __kmp_nThreadsPerCore;
1921 int __kmp_dispatch_get_index(
int tid, kmp_hier_layer_e type) {
1922 int index = type + 1;
1923 int num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
1924 KMP_DEBUG_ASSERT(type != kmp_hier_layer_e::LAYER_LAST);
1925 if (type == kmp_hier_layer_e::LAYER_THREAD)
1927 else if (type == kmp_hier_layer_e::LAYER_LOOP)
1929 KMP_DEBUG_ASSERT(__kmp_hier_max_units[index] != 0);
1930 if (tid >= num_hw_threads)
1931 tid = tid % num_hw_threads;
1932 return (tid / __kmp_hier_threads_per[index]) % __kmp_hier_max_units[index];
1936 int __kmp_dispatch_get_t1_per_t2(kmp_hier_layer_e t1, kmp_hier_layer_e t2) {
1939 KMP_DEBUG_ASSERT(i1 <= i2);
1940 KMP_DEBUG_ASSERT(t1 != kmp_hier_layer_e::LAYER_LAST);
1941 KMP_DEBUG_ASSERT(t2 != kmp_hier_layer_e::LAYER_LAST);
1942 KMP_DEBUG_ASSERT(__kmp_hier_threads_per[i1] != 0);
1944 return __kmp_hier_threads_per[i2] / __kmp_hier_threads_per[i1];
1946 #endif // KMP_USE_HIER_SCHED 1950 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
1952 kmp_i18n_id_t *
const msg_id,
1955 *msg_id = kmp_i18n_null;
1960 unsigned num_records = 0;
1962 buf[
sizeof(buf) - 1] = 1;
1963 if (!fgets(buf,
sizeof(buf), f)) {
1968 char s1[] =
"processor";
1969 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1976 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
1977 if (nodeIdIndex + level >= maxIndex) {
1978 maxIndex = nodeIdIndex + level;
1986 if (num_records == 0) {
1988 *msg_id = kmp_i18n_str_NoProcRecords;
1991 if (num_records > (
unsigned)__kmp_xproc) {
1993 *msg_id = kmp_i18n_str_TooManyProcRecords;
2002 if (fseek(f, 0, SEEK_SET) != 0) {
2004 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
2010 unsigned **threadInfo =
2011 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
2013 for (i = 0; i <= num_records; i++) {
2015 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2018 #define CLEANUP_THREAD_INFO \ 2019 for (i = 0; i <= num_records; i++) { \ 2020 __kmp_free(threadInfo[i]); \ 2022 __kmp_free(threadInfo); 2027 #define INIT_PROC_INFO(p) \ 2028 for (__index = 0; __index <= maxIndex; __index++) { \ 2029 (p)[__index] = UINT_MAX; \ 2032 for (i = 0; i <= num_records; i++) {
2033 INIT_PROC_INFO(threadInfo[i]);
2036 unsigned num_avail = 0;
2043 buf[
sizeof(buf) - 1] = 1;
2044 bool long_line =
false;
2045 if (!fgets(buf,
sizeof(buf), f)) {
2050 for (i = 0; i <= maxIndex; i++) {
2051 if (threadInfo[num_avail][i] != UINT_MAX) {
2059 }
else if (!buf[
sizeof(buf) - 1]) {
2064 #define CHECK_LINE \ 2066 CLEANUP_THREAD_INFO; \ 2067 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 2073 char s1[] =
"processor";
2074 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2076 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2078 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2080 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
2081 #if KMP_ARCH_AARCH64 2090 threadInfo[num_avail][osIdIndex] = val;
2091 #if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) 2095 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2096 threadInfo[num_avail][osIdIndex]);
2097 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2099 KMP_SNPRINTF(path,
sizeof(path),
2100 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2101 threadInfo[num_avail][osIdIndex]);
2102 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2106 char s2[] =
"physical id";
2107 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2109 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2111 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2113 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
2115 threadInfo[num_avail][pkgIdIndex] = val;
2118 char s3[] =
"core id";
2119 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2121 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2123 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2125 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
2127 threadInfo[num_avail][coreIdIndex] = val;
2129 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 2131 char s4[] =
"thread id";
2132 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2134 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2136 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2138 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
2140 threadInfo[num_avail][threadIdIndex] = val;
2144 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
2146 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2148 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
2150 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2151 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
2153 threadInfo[num_avail][nodeIdIndex + level] = val;
2160 if ((*buf != 0) && (*buf !=
'\n')) {
2165 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
2173 if ((
int)num_avail == __kmp_xproc) {
2174 CLEANUP_THREAD_INFO;
2175 *msg_id = kmp_i18n_str_TooManyEntries;
2181 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2182 CLEANUP_THREAD_INFO;
2183 *msg_id = kmp_i18n_str_MissingProcField;
2186 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2187 CLEANUP_THREAD_INFO;
2188 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2193 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
2194 __kmp_affin_fullMask)) {
2195 INIT_PROC_INFO(threadInfo[num_avail]);
2202 KMP_ASSERT(num_avail <= num_records);
2203 INIT_PROC_INFO(threadInfo[num_avail]);
2208 CLEANUP_THREAD_INFO;
2209 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2213 CLEANUP_THREAD_INFO;
2214 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2219 #if KMP_MIC && REDUCE_TEAM_SIZE 2220 unsigned teamSize = 0;
2221 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2232 KMP_ASSERT(num_avail > 0);
2233 KMP_ASSERT(num_avail <= num_records);
2234 if (num_avail == 1) {
2236 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2237 if (__kmp_affinity_verbose) {
2238 if (!KMP_AFFINITY_CAPABLE()) {
2239 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2240 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2241 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2243 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2244 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2245 __kmp_affin_fullMask);
2246 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2247 if (__kmp_affinity_respect_mask) {
2248 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2250 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2252 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2253 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2257 __kmp_str_buf_init(&buf);
2258 __kmp_str_buf_print(&buf,
"1");
2259 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2260 __kmp_str_buf_print(&buf,
" x 1");
2262 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2263 __kmp_str_buf_free(&buf);
2266 if (__kmp_affinity_type == affinity_none) {
2267 CLEANUP_THREAD_INFO;
2271 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2273 addr.labels[0] = threadInfo[0][pkgIdIndex];
2274 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2276 if (__kmp_affinity_gran_levels < 0) {
2277 __kmp_affinity_gran_levels = 0;
2280 if (__kmp_affinity_verbose) {
2281 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2284 CLEANUP_THREAD_INFO;
2289 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2290 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2302 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2304 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2306 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2308 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2310 bool assign_thread_ids =
false;
2311 unsigned threadIdCt;
2314 restart_radix_check:
2318 if (assign_thread_ids) {
2319 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2320 threadInfo[0][threadIdIndex] = threadIdCt++;
2321 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2322 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2325 for (index = 0; index <= maxIndex; index++) {
2329 lastId[index] = threadInfo[0][index];
2334 for (i = 1; i < num_avail; i++) {
2337 for (index = maxIndex; index >= threadIdIndex; index--) {
2338 if (assign_thread_ids && (index == threadIdIndex)) {
2340 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2341 threadInfo[i][threadIdIndex] = threadIdCt++;
2345 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2346 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2349 if (threadInfo[i][index] != lastId[index]) {
2354 for (index2 = threadIdIndex; index2 < index; index2++) {
2356 if (counts[index2] > maxCt[index2]) {
2357 maxCt[index2] = counts[index2];
2360 lastId[index2] = threadInfo[i][index2];
2364 lastId[index] = threadInfo[i][index];
2366 if (assign_thread_ids && (index > threadIdIndex)) {
2368 #if KMP_MIC && REDUCE_TEAM_SIZE 2371 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2372 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2378 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2379 threadInfo[i][threadIdIndex] = threadIdCt++;
2385 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2386 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2392 if (index < threadIdIndex) {
2396 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2401 CLEANUP_THREAD_INFO;
2402 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2408 assign_thread_ids =
true;
2409 goto restart_radix_check;
2413 #if KMP_MIC && REDUCE_TEAM_SIZE 2416 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2417 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2419 for (index = threadIdIndex; index <= maxIndex; index++) {
2420 if (counts[index] > maxCt[index]) {
2421 maxCt[index] = counts[index];
2425 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2426 nCoresPerPkg = maxCt[coreIdIndex];
2427 nPackages = totals[pkgIdIndex];
2430 unsigned prod = totals[maxIndex];
2431 for (index = threadIdIndex; index < maxIndex; index++) {
2432 prod *= maxCt[index];
2434 bool uniform = (prod == totals[threadIdIndex]);
2440 __kmp_ncores = totals[coreIdIndex];
2442 if (__kmp_affinity_verbose) {
2443 if (!KMP_AFFINITY_CAPABLE()) {
2444 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2445 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2447 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2449 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2452 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2453 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2454 __kmp_affin_fullMask);
2455 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2456 if (__kmp_affinity_respect_mask) {
2457 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2459 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2461 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2463 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2465 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2469 __kmp_str_buf_init(&buf);
2471 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2472 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2473 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2475 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2476 maxCt[threadIdIndex], __kmp_ncores);
2478 __kmp_str_buf_free(&buf);
2481 #if KMP_MIC && REDUCE_TEAM_SIZE 2483 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2484 __kmp_dflt_team_nth = teamSize;
2485 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting " 2486 "__kmp_dflt_team_nth = %d\n",
2487 __kmp_dflt_team_nth));
2489 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2491 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2492 KMP_DEBUG_ASSERT(num_avail == (
unsigned)__kmp_avail_proc);
2493 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2494 for (i = 0; i < num_avail; ++i) {
2495 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2498 if (__kmp_affinity_type == affinity_none) {
2503 CLEANUP_THREAD_INFO;
2512 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2513 for (index = threadIdIndex; index < maxIndex; index++) {
2514 KMP_ASSERT(totals[index] >= totals[index + 1]);
2515 inMap[index] = (totals[index] > totals[index + 1]);
2517 inMap[maxIndex] = (totals[maxIndex] > 1);
2518 inMap[pkgIdIndex] =
true;
2521 for (index = threadIdIndex; index <= maxIndex; index++) {
2526 KMP_ASSERT(depth > 0);
2529 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2532 int threadLevel = -1;
2534 for (i = 0; i < num_avail; ++i) {
2535 Address addr(depth);
2536 unsigned os = threadInfo[i][osIdIndex];
2540 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2541 if (!inMap[src_index]) {
2544 addr.labels[dst_index] = threadInfo[i][src_index];
2545 if (src_index == pkgIdIndex) {
2546 pkgLevel = dst_index;
2547 }
else if (src_index == coreIdIndex) {
2548 coreLevel = dst_index;
2549 }
else if (src_index == threadIdIndex) {
2550 threadLevel = dst_index;
2554 (*address2os)[i] = AddrUnsPair(addr, os);
2557 if (__kmp_affinity_gran_levels < 0) {
2561 __kmp_affinity_gran_levels = 0;
2562 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2563 if (!inMap[src_index]) {
2566 switch (src_index) {
2568 if (__kmp_affinity_gran > affinity_gran_thread) {
2569 __kmp_affinity_gran_levels++;
2574 if (__kmp_affinity_gran > affinity_gran_core) {
2575 __kmp_affinity_gran_levels++;
2580 if (__kmp_affinity_gran > affinity_gran_package) {
2581 __kmp_affinity_gran_levels++;
2588 if (__kmp_affinity_verbose) {
2589 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2590 coreLevel, threadLevel);
2598 CLEANUP_THREAD_INFO;
2605 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2606 unsigned *numUnique,
2607 AddrUnsPair *address2os,
2608 unsigned numAddrs) {
2614 KMP_ASSERT(numAddrs > 0);
2615 depth = address2os[0].first.depth;
2618 for (i = numAddrs - 1;; --i) {
2619 unsigned osId = address2os[i].second;
2620 if (osId > maxOsId) {
2626 kmp_affin_mask_t *osId2Mask;
2627 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2631 qsort(address2os, numAddrs,
sizeof(*address2os),
2632 __kmp_affinity_cmp_Address_labels);
2634 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2635 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2636 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2638 if (__kmp_affinity_gran_levels >= (
int)depth) {
2639 if (__kmp_affinity_verbose ||
2640 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2641 KMP_WARNING(AffThreadsMayMigrate);
2649 unsigned unique = 0;
2651 unsigned leader = 0;
2652 Address *leaderAddr = &(address2os[0].first);
2653 kmp_affin_mask_t *sum;
2654 KMP_CPU_ALLOC_ON_STACK(sum);
2656 KMP_CPU_SET(address2os[0].second, sum);
2657 for (i = 1; i < numAddrs; i++) {
2661 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2662 KMP_CPU_SET(address2os[i].second, sum);
2668 for (; j < i; j++) {
2669 unsigned osId = address2os[j].second;
2670 KMP_DEBUG_ASSERT(osId <= maxOsId);
2671 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2672 KMP_CPU_COPY(mask, sum);
2673 address2os[j].first.leader = (j == leader);
2679 leaderAddr = &(address2os[i].first);
2681 KMP_CPU_SET(address2os[i].second, sum);
2686 for (; j < i; j++) {
2687 unsigned osId = address2os[j].second;
2688 KMP_DEBUG_ASSERT(osId <= maxOsId);
2689 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2690 KMP_CPU_COPY(mask, sum);
2691 address2os[j].first.leader = (j == leader);
2694 KMP_CPU_FREE_FROM_STACK(sum);
2696 *maxIndex = maxOsId;
2697 *numUnique = unique;
2704 static kmp_affin_mask_t *newMasks;
2705 static int numNewMasks;
2706 static int nextNewMask;
2708 #define ADD_MASK(_mask) \ 2710 if (nextNewMask >= numNewMasks) { \ 2713 kmp_affin_mask_t *temp; \ 2714 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ 2715 for (i = 0; i < numNewMasks / 2; i++) { \ 2716 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ 2717 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ 2718 KMP_CPU_COPY(dest, src); \ 2720 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ 2723 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2727 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ 2729 if (((_osId) > _maxOsId) || \ 2730 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2731 if (__kmp_affinity_verbose || \ 2732 (__kmp_affinity_warnings && \ 2733 (__kmp_affinity_type != affinity_none))) { \ 2734 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2737 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2743 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2744 unsigned int *out_numMasks,
2745 const char *proclist,
2746 kmp_affin_mask_t *osId2Mask,
2749 const char *scan = proclist;
2750 const char *next = proclist;
2755 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2757 kmp_affin_mask_t *sumMask;
2758 KMP_CPU_ALLOC(sumMask);
2762 int start, end, stride;
2766 if (*next ==
'\0') {
2778 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2780 num = __kmp_str_to_int(scan, *next);
2781 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2784 if ((num > maxOsId) ||
2785 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2786 if (__kmp_affinity_verbose ||
2787 (__kmp_affinity_warnings &&
2788 (__kmp_affinity_type != affinity_none))) {
2789 KMP_WARNING(AffIgnoreInvalidProcID, num);
2791 KMP_CPU_ZERO(sumMask);
2793 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2813 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2816 num = __kmp_str_to_int(scan, *next);
2817 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2820 if ((num > maxOsId) ||
2821 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2822 if (__kmp_affinity_verbose ||
2823 (__kmp_affinity_warnings &&
2824 (__kmp_affinity_type != affinity_none))) {
2825 KMP_WARNING(AffIgnoreInvalidProcID, num);
2828 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2845 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2847 start = __kmp_str_to_int(scan, *next);
2848 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2853 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2867 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2869 end = __kmp_str_to_int(scan, *next);
2870 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2887 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2889 stride = __kmp_str_to_int(scan, *next);
2890 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2895 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2897 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2899 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2901 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2906 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2908 }
while (start <= end);
2911 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2913 }
while (start >= end);
2924 *out_numMasks = nextNewMask;
2925 if (nextNewMask == 0) {
2927 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2930 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2931 for (i = 0; i < nextNewMask; i++) {
2932 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
2933 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
2934 KMP_CPU_COPY(dest, src);
2936 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2937 KMP_CPU_FREE(sumMask);
2963 static void __kmp_process_subplace_list(
const char **scan,
2964 kmp_affin_mask_t *osId2Mask,
2965 int maxOsId, kmp_affin_mask_t *tempMask,
2970 int start, count, stride, i;
2974 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
2977 start = __kmp_str_to_int(*scan, *next);
2978 KMP_ASSERT(start >= 0);
2983 if (**scan ==
'}' || **scan ==
',') {
2984 if ((start > maxOsId) ||
2985 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2986 if (__kmp_affinity_verbose ||
2987 (__kmp_affinity_warnings &&
2988 (__kmp_affinity_type != affinity_none))) {
2989 KMP_WARNING(AffIgnoreInvalidProcID, start);
2992 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2995 if (**scan ==
'}') {
3001 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3006 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3009 count = __kmp_str_to_int(*scan, *next);
3010 KMP_ASSERT(count >= 0);
3015 if (**scan ==
'}' || **scan ==
',') {
3016 for (i = 0; i < count; i++) {
3017 if ((start > maxOsId) ||
3018 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3019 if (__kmp_affinity_verbose ||
3020 (__kmp_affinity_warnings &&
3021 (__kmp_affinity_type != affinity_none))) {
3022 KMP_WARNING(AffIgnoreInvalidProcID, start);
3026 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3031 if (**scan ==
'}') {
3037 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3044 if (**scan ==
'+') {
3048 if (**scan ==
'-') {
3056 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
3059 stride = __kmp_str_to_int(*scan, *next);
3060 KMP_ASSERT(stride >= 0);
3066 if (**scan ==
'}' || **scan ==
',') {
3067 for (i = 0; i < count; i++) {
3068 if ((start > maxOsId) ||
3069 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3070 if (__kmp_affinity_verbose ||
3071 (__kmp_affinity_warnings &&
3072 (__kmp_affinity_type != affinity_none))) {
3073 KMP_WARNING(AffIgnoreInvalidProcID, start);
3077 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3082 if (**scan ==
'}') {
3089 KMP_ASSERT2(0,
"bad explicit places list");
3093 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3094 int maxOsId, kmp_affin_mask_t *tempMask,
3100 if (**scan ==
'{') {
3102 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
3103 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3105 }
else if (**scan ==
'!') {
3107 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3108 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
3109 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
3112 int num = __kmp_str_to_int(*scan, *next);
3113 KMP_ASSERT(num >= 0);
3114 if ((num > maxOsId) ||
3115 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3116 if (__kmp_affinity_verbose ||
3117 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
3118 KMP_WARNING(AffIgnoreInvalidProcID, num);
3121 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3126 KMP_ASSERT2(0,
"bad explicit places list");
3131 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3132 unsigned int *out_numMasks,
3133 const char *placelist,
3134 kmp_affin_mask_t *osId2Mask,
3136 int i, j, count, stride, sign;
3137 const char *scan = placelist;
3138 const char *next = placelist;
3141 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
3147 kmp_affin_mask_t *tempMask;
3148 kmp_affin_mask_t *previousMask;
3149 KMP_CPU_ALLOC(tempMask);
3150 KMP_CPU_ZERO(tempMask);
3151 KMP_CPU_ALLOC(previousMask);
3152 KMP_CPU_ZERO(previousMask);
3156 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3160 if (*scan ==
'\0' || *scan ==
',') {
3164 KMP_CPU_ZERO(tempMask);
3166 if (*scan ==
'\0') {
3173 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3178 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3181 count = __kmp_str_to_int(scan, *next);
3182 KMP_ASSERT(count >= 0);
3187 if (*scan ==
'\0' || *scan ==
',') {
3190 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3209 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
3212 stride = __kmp_str_to_int(scan, *next);
3213 KMP_DEBUG_ASSERT(stride >= 0);
3219 for (i = 0; i < count; i++) {
3224 KMP_CPU_COPY(previousMask, tempMask);
3225 ADD_MASK(previousMask);
3226 KMP_CPU_ZERO(tempMask);
3228 KMP_CPU_SET_ITERATE(j, previousMask) {
3229 if (!KMP_CPU_ISSET(j, previousMask)) {
3232 if ((j + stride > maxOsId) || (j + stride < 0) ||
3233 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
3234 (!KMP_CPU_ISSET(j + stride,
3235 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
3236 if ((__kmp_affinity_verbose ||
3237 (__kmp_affinity_warnings &&
3238 (__kmp_affinity_type != affinity_none))) &&
3240 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
3244 KMP_CPU_SET(j + stride, tempMask);
3248 KMP_CPU_ZERO(tempMask);
3253 if (*scan ==
'\0') {
3261 KMP_ASSERT2(0,
"bad explicit places list");
3264 *out_numMasks = nextNewMask;
3265 if (nextNewMask == 0) {
3267 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3270 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3271 KMP_CPU_FREE(tempMask);
3272 KMP_CPU_FREE(previousMask);
3273 for (i = 0; i < nextNewMask; i++) {
3274 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3275 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3276 KMP_CPU_COPY(dest, src);
3278 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3284 #undef ADD_MASK_OSID 3287 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3290 hwloc_obj_t hT = NULL;
3291 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3292 for (
int i = 0; i < N; ++i) {
3293 KMP_DEBUG_ASSERT(hT);
3294 unsigned idx = hT->os_index;
3295 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3296 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3297 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3300 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3305 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3307 hwloc_obj_t hT = NULL;
3308 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3309 for (
int i = 0; i < N; ++i) {
3310 KMP_DEBUG_ASSERT(hT);
3311 unsigned idx = hT->os_index;
3312 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3314 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3318 #endif // KMP_USE_HWLOC 3320 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3321 AddrUnsPair *newAddr;
3322 if (__kmp_hws_requested == 0)
3325 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3329 hwloc_topology_t tp = __kmp_hwloc_topology;
3330 int nS = 0, nN = 0, nL = 0, nC = 0,
3332 int nCr = 0, nTr = 0;
3333 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3334 hwloc_obj_t hT, hC, hL, hN, hS;
3338 int numa_support = 0, tile_support = 0;
3339 if (__kmp_pu_os_idx)
3340 hT = hwloc_get_pu_obj_by_os_index(tp,
3341 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3343 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3345 KMP_WARNING(AffHWSubsetUnsupported);
3349 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3350 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3351 if (hN != NULL && hN->depth > hS->depth) {
3353 }
else if (__kmp_hws_node.num > 0) {
3355 KMP_WARNING(AffHWSubsetUnsupported);
3359 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3360 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3362 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3364 }
else if (__kmp_hws_tile.num > 0) {
3365 if (__kmp_hws_core.num == 0) {
3366 __kmp_hws_core = __kmp_hws_tile;
3367 __kmp_hws_tile.num = 0;
3370 KMP_WARNING(AffHWSubsetInvalid);
3377 if (__kmp_hws_socket.num == 0)
3378 __kmp_hws_socket.num = nPackages;
3379 if (__kmp_hws_socket.offset >= nPackages) {
3380 KMP_WARNING(AffHWSubsetManySockets);
3385 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3387 if (__kmp_hws_node.num == 0)
3388 __kmp_hws_node.num = NN;
3389 if (__kmp_hws_node.offset >= NN) {
3390 KMP_WARNING(AffHWSubsetManyNodes);
3395 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3396 if (__kmp_hws_tile.num == 0) {
3397 __kmp_hws_tile.num = NL + 1;
3399 if (__kmp_hws_tile.offset >= NL) {
3400 KMP_WARNING(AffHWSubsetManyTiles);
3403 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3405 if (__kmp_hws_core.num == 0)
3406 __kmp_hws_core.num = NC;
3407 if (__kmp_hws_core.offset >= NC) {
3408 KMP_WARNING(AffHWSubsetManyCores);
3412 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3414 if (__kmp_hws_core.num == 0)
3415 __kmp_hws_core.num = NC;
3416 if (__kmp_hws_core.offset >= NC) {
3417 KMP_WARNING(AffHWSubsetManyCores);
3424 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3425 if (__kmp_hws_tile.num == 0)
3426 __kmp_hws_tile.num = NL;
3427 if (__kmp_hws_tile.offset >= NL) {
3428 KMP_WARNING(AffHWSubsetManyTiles);
3431 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3433 if (__kmp_hws_core.num == 0)
3434 __kmp_hws_core.num = NC;
3435 if (__kmp_hws_core.offset >= NC) {
3436 KMP_WARNING(AffHWSubsetManyCores);
3440 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3442 if (__kmp_hws_core.num == 0)
3443 __kmp_hws_core.num = NC;
3444 if (__kmp_hws_core.offset >= NC) {
3445 KMP_WARNING(AffHWSubsetManyCores);
3450 if (__kmp_hws_proc.num == 0)
3451 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3452 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3453 KMP_WARNING(AffHWSubsetManyProcs);
3459 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3463 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3464 for (
int s = 0; s < NP; ++s) {
3466 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3467 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3470 if (nS <= __kmp_hws_socket.offset ||
3471 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3472 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3483 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3484 for (
int n = 0; n < NN; ++n) {
3486 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3487 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3491 if (nN <= __kmp_hws_node.offset ||
3492 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3494 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3495 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3502 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3503 for (
int l = 0; l < NL; ++l) {
3505 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3506 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3510 if (nL <= __kmp_hws_tile.offset ||
3511 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3513 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3514 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3521 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3522 HWLOC_OBJ_CORE, &hC);
3523 for (
int c = 0; c < NC; ++c) {
3525 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3526 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3530 if (nC <= __kmp_hws_core.offset ||
3531 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3533 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3534 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3542 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3544 for (
int t = 0; t < NT; ++t) {
3547 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3548 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3552 if (nT <= __kmp_hws_proc.offset ||
3553 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3555 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3557 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3558 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3563 newAddr[n_new] = (*pAddr)[n_old];
3566 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3574 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3576 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3584 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3585 for (
int c = 0; c < NC; ++c) {
3587 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3588 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3592 if (nC <= __kmp_hws_core.offset ||
3593 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3595 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3596 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3604 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3605 for (
int t = 0; t < NT; ++t) {
3608 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3609 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3613 if (nT <= __kmp_hws_proc.offset ||
3614 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3616 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3618 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3619 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3624 newAddr[n_new] = (*pAddr)[n_old];
3627 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3635 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3638 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3646 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3647 for (
int l = 0; l < NL; ++l) {
3649 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3650 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3654 if (nL <= __kmp_hws_tile.offset ||
3655 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3657 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3658 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3666 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3667 for (
int c = 0; c < NC; ++c) {
3669 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3670 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3674 if (nC <= __kmp_hws_core.offset ||
3675 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3677 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3678 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3687 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3688 for (
int t = 0; t < NT; ++t) {
3691 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3692 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3696 if (nT <= __kmp_hws_proc.offset ||
3697 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3699 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3701 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3702 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3707 newAddr[n_new] = (*pAddr)[n_old];
3710 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3718 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3720 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3728 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3729 for (
int c = 0; c < NC; ++c) {
3731 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3732 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3736 if (nC <= __kmp_hws_core.offset ||
3737 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3739 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3740 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3749 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3750 for (
int t = 0; t < NT; ++t) {
3753 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3754 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3758 if (nT <= __kmp_hws_proc.offset ||
3759 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3761 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3763 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3764 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3769 newAddr[n_new] = (*pAddr)[n_old];
3772 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3780 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3792 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3793 KMP_DEBUG_ASSERT(nPkg > 0);
3794 KMP_DEBUG_ASSERT(nCpP > 0);
3795 KMP_DEBUG_ASSERT(nTpC > 0);
3796 KMP_DEBUG_ASSERT(nCo > 0);
3797 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3798 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3799 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3800 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3803 nCoresPerPkg = nCpP;
3804 __kmp_nThreadsPerCore = nTpC;
3805 __kmp_avail_proc = n_new;
3809 #endif // KMP_USE_HWLOC 3811 int n_old = 0, n_new = 0, proc_num = 0;
3812 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3813 KMP_WARNING(AffHWSubsetNoHWLOC);
3816 if (__kmp_hws_socket.num == 0)
3817 __kmp_hws_socket.num = nPackages;
3818 if (__kmp_hws_core.num == 0)
3819 __kmp_hws_core.num = nCoresPerPkg;
3820 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3821 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3822 if (!__kmp_affinity_uniform_topology()) {
3823 KMP_WARNING(AffHWSubsetNonUniform);
3827 KMP_WARNING(AffHWSubsetNonThreeLevel);
3830 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3831 KMP_WARNING(AffHWSubsetManySockets);
3834 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3835 KMP_WARNING(AffHWSubsetManyCores);
3840 newAddr = (AddrUnsPair *)__kmp_allocate(
3841 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3842 __kmp_hws_proc.num);
3843 for (
int i = 0; i < nPackages; ++i) {
3844 if (i < __kmp_hws_socket.offset ||
3845 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3847 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3848 if (__kmp_pu_os_idx != NULL) {
3850 for (
int j = 0; j < nCoresPerPkg; ++j) {
3851 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3852 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3859 for (
int j = 0; j < nCoresPerPkg; ++j) {
3860 if (j < __kmp_hws_core.offset ||
3861 j >= __kmp_hws_core.offset +
3862 __kmp_hws_core.num) {
3863 n_old += __kmp_nThreadsPerCore;
3864 if (__kmp_pu_os_idx != NULL) {
3865 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3866 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3872 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3873 if (k < __kmp_hws_proc.num) {
3875 newAddr[n_new] = (*pAddr)[n_old];
3878 if (__kmp_pu_os_idx != NULL)
3879 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3888 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3889 KMP_DEBUG_ASSERT(n_new ==
3890 __kmp_hws_socket.num * __kmp_hws_core.num *
3891 __kmp_hws_proc.num);
3892 nPackages = __kmp_hws_socket.num;
3893 nCoresPerPkg = __kmp_hws_core.num;
3894 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3895 __kmp_avail_proc = n_new;
3896 __kmp_ncores = nPackages * __kmp_hws_core.num;
3902 if (__kmp_affinity_verbose) {
3903 char m[KMP_AFFIN_MASK_PRINT_LEN];
3904 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3905 __kmp_affin_fullMask);
3906 if (__kmp_affinity_respect_mask) {
3907 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3909 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3911 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3913 __kmp_str_buf_init(&buf);
3914 __kmp_str_buf_print(&buf,
"%d", nPackages);
3915 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3916 __kmp_nThreadsPerCore, __kmp_ncores);
3917 __kmp_str_buf_free(&buf);
3920 if (__kmp_pu_os_idx != NULL) {
3921 __kmp_free(__kmp_pu_os_idx);
3922 __kmp_pu_os_idx = NULL;
3928 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
3929 int nprocs,
int bottom_level) {
3932 for (
int i = 0; i < nprocs; i++) {
3933 for (
int j = bottom_level; j > 0; j--) {
3934 if (address2os[i].first.labels[j] > 0) {
3935 if (core_level < (j - 1)) {
3945 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
3946 int nprocs,
int bottom_level,
3952 for (i = 0; i < nprocs; i++) {
3953 for (j = bottom_level; j > core_level; j--) {
3954 if ((i + 1) < nprocs) {
3955 if (address2os[i + 1].first.labels[j] > 0) {
3960 if (j == core_level) {
3964 if (j > core_level) {
3973 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
3974 int bottom_level,
int core_level) {
3975 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
3982 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
3983 int nprocs,
int bottom_level,
3985 int maxprocpercore = 0;
3987 if (core_level < bottom_level) {
3988 for (
int i = 0; i < nprocs; i++) {
3989 int percore = address2os[i].first.labels[core_level + 1] + 1;
3991 if (percore > maxprocpercore) {
3992 maxprocpercore = percore;
3998 return maxprocpercore;
4001 static AddrUnsPair *address2os = NULL;
4002 static int *procarr = NULL;
4003 static int __kmp_aff_depth = 0;
4005 #if KMP_USE_HIER_SCHED 4006 #define KMP_EXIT_AFF_NONE \ 4007 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4008 KMP_ASSERT(address2os == NULL); \ 4009 __kmp_apply_thread_places(NULL, 0); \ 4010 __kmp_create_affinity_none_places(); \ 4011 __kmp_dispatch_set_hierarchy_values(); \ 4014 #define KMP_EXIT_AFF_NONE \ 4015 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 4016 KMP_ASSERT(address2os == NULL); \ 4017 __kmp_apply_thread_places(NULL, 0); \ 4018 __kmp_create_affinity_none_places(); \ 4024 static void __kmp_create_affinity_none_places() {
4025 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4026 KMP_ASSERT(__kmp_affinity_type == affinity_none);
4027 __kmp_affinity_num_masks = 1;
4028 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4029 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
4030 KMP_CPU_COPY(dest, __kmp_affin_fullMask);
4033 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
4034 const Address *aa = &(((
const AddrUnsPair *)a)->first);
4035 const Address *bb = &(((
const AddrUnsPair *)b)->first);
4036 unsigned depth = aa->depth;
4038 KMP_DEBUG_ASSERT(depth == bb->depth);
4039 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
4040 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
4041 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
4042 int j = depth - i - 1;
4043 if (aa->childNums[j] < bb->childNums[j])
4045 if (aa->childNums[j] > bb->childNums[j])
4048 for (; i < depth; i++) {
4049 int j = i - __kmp_affinity_compact;
4050 if (aa->childNums[j] < bb->childNums[j])
4052 if (aa->childNums[j] > bb->childNums[j])
4058 static void __kmp_aux_affinity_initialize(
void) {
4059 if (__kmp_affinity_masks != NULL) {
4060 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4068 if (__kmp_affin_fullMask == NULL) {
4069 KMP_CPU_ALLOC(__kmp_affin_fullMask);
4071 if (KMP_AFFINITY_CAPABLE()) {
4072 if (__kmp_affinity_respect_mask) {
4073 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
4077 __kmp_avail_proc = 0;
4078 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
4079 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
4084 if (__kmp_avail_proc > __kmp_xproc) {
4085 if (__kmp_affinity_verbose ||
4086 (__kmp_affinity_warnings &&
4087 (__kmp_affinity_type != affinity_none))) {
4088 KMP_WARNING(ErrorInitializeAffinity);
4090 __kmp_affinity_type = affinity_none;
4091 KMP_AFFINITY_DISABLE();
4095 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
4096 __kmp_avail_proc = __kmp_xproc;
4100 if (__kmp_affinity_gran == affinity_gran_tile &&
4102 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::NATIVE_OS) {
4103 KMP_WARNING(AffTilesNoHWLOC,
"KMP_AFFINITY");
4104 __kmp_affinity_gran = affinity_gran_package;
4108 kmp_i18n_id_t msg_id = kmp_i18n_null;
4112 if ((__kmp_cpuinfo_file != NULL) &&
4113 (__kmp_affinity_top_method == affinity_top_method_all)) {
4114 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
4117 if (__kmp_affinity_top_method == affinity_top_method_all) {
4121 const char *file_name = NULL;
4125 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
4126 if (__kmp_affinity_verbose) {
4127 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4129 if (!__kmp_hwloc_error) {
4130 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4133 }
else if (depth < 0 && __kmp_affinity_verbose) {
4134 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4136 }
else if (__kmp_affinity_verbose) {
4137 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
4142 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4145 if (__kmp_affinity_verbose) {
4146 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4150 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4156 if (__kmp_affinity_verbose) {
4157 if (msg_id != kmp_i18n_null) {
4158 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
4159 __kmp_i18n_catgets(msg_id),
4160 KMP_I18N_STR(DecodingLegacyAPIC));
4162 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
4163 KMP_I18N_STR(DecodingLegacyAPIC));
4168 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4180 if (__kmp_affinity_verbose) {
4181 if (msg_id != kmp_i18n_null) {
4182 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
4183 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
4185 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
4189 FILE *f = fopen(
"/proc/cpuinfo",
"r");
4191 msg_id = kmp_i18n_str_CantOpenCpuinfo;
4193 file_name =
"/proc/cpuinfo";
4195 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4205 #if KMP_GROUP_AFFINITY 4207 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
4208 if (__kmp_affinity_verbose) {
4209 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4212 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4213 KMP_ASSERT(depth != 0);
4219 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
4220 if (file_name == NULL) {
4221 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
4222 }
else if (line == 0) {
4223 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
4225 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
4226 __kmp_i18n_catgets(msg_id));
4232 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4236 KMP_ASSERT(depth > 0);
4237 KMP_ASSERT(address2os != NULL);
4242 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4243 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4244 if (__kmp_affinity_verbose) {
4245 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4247 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4252 #endif // KMP_USE_HWLOC 4258 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4260 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
4261 if (__kmp_affinity_verbose) {
4262 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4265 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4270 KMP_ASSERT(msg_id != kmp_i18n_null);
4271 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4273 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4274 if (__kmp_affinity_verbose) {
4275 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4278 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4283 KMP_ASSERT(msg_id != kmp_i18n_null);
4284 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4290 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4291 const char *filename;
4292 if (__kmp_cpuinfo_file != NULL) {
4293 filename = __kmp_cpuinfo_file;
4295 filename =
"/proc/cpuinfo";
4298 if (__kmp_affinity_verbose) {
4299 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4302 FILE *f = fopen(filename,
"r");
4305 if (__kmp_cpuinfo_file != NULL) {
4306 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4307 KMP_HNT(NameComesFrom_CPUINFO_FILE), __kmp_msg_null);
4309 __kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),
4314 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4317 KMP_ASSERT(msg_id != kmp_i18n_null);
4319 KMP_FATAL(FileLineMsgExiting, filename, line,
4320 __kmp_i18n_catgets(msg_id));
4322 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4325 if (__kmp_affinity_type == affinity_none) {
4326 KMP_ASSERT(depth == 0);
4331 #if KMP_GROUP_AFFINITY 4333 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4334 if (__kmp_affinity_verbose) {
4335 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4338 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4339 KMP_ASSERT(depth != 0);
4341 KMP_ASSERT(msg_id != kmp_i18n_null);
4342 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4348 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4349 if (__kmp_affinity_verbose) {
4350 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4353 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4358 KMP_ASSERT(depth > 0);
4359 KMP_ASSERT(address2os != NULL);
4362 #if KMP_USE_HIER_SCHED 4363 __kmp_dispatch_set_hierarchy_values();
4366 if (address2os == NULL) {
4367 if (KMP_AFFINITY_CAPABLE() &&
4368 (__kmp_affinity_verbose ||
4369 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4370 KMP_WARNING(ErrorInitializeAffinity);
4372 __kmp_affinity_type = affinity_none;
4373 __kmp_create_affinity_none_places();
4374 KMP_AFFINITY_DISABLE();
4378 if (__kmp_affinity_gran == affinity_gran_tile
4380 && __kmp_tile_depth == 0
4384 KMP_WARNING(AffTilesNoTiles,
"KMP_AFFINITY");
4387 __kmp_apply_thread_places(&address2os, depth);
4392 kmp_affin_mask_t *osId2Mask =
4393 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4394 if (__kmp_affinity_gran_levels == 0) {
4395 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4401 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4403 switch (__kmp_affinity_type) {
4405 case affinity_explicit:
4406 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4408 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4411 __kmp_affinity_process_proclist(
4412 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4413 __kmp_affinity_proclist, osId2Mask, maxIndex);
4417 __kmp_affinity_process_placelist(
4418 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4419 __kmp_affinity_proclist, osId2Mask, maxIndex);
4422 if (__kmp_affinity_num_masks == 0) {
4423 if (__kmp_affinity_verbose ||
4424 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4425 KMP_WARNING(AffNoValidProcID);
4427 __kmp_affinity_type = affinity_none;
4437 case affinity_logical:
4438 __kmp_affinity_compact = 0;
4439 if (__kmp_affinity_offset) {
4440 __kmp_affinity_offset =
4441 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4445 case affinity_physical:
4446 if (__kmp_nThreadsPerCore > 1) {
4447 __kmp_affinity_compact = 1;
4448 if (__kmp_affinity_compact >= depth) {
4449 __kmp_affinity_compact = 0;
4452 __kmp_affinity_compact = 0;
4454 if (__kmp_affinity_offset) {
4455 __kmp_affinity_offset =
4456 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4460 case affinity_scatter:
4461 if (__kmp_affinity_compact >= depth) {
4462 __kmp_affinity_compact = 0;
4464 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4468 case affinity_compact:
4469 if (__kmp_affinity_compact >= depth) {
4470 __kmp_affinity_compact = depth - 1;
4474 case affinity_balanced:
4476 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4477 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4479 __kmp_affinity_type = affinity_none;
4481 }
else if (__kmp_affinity_uniform_topology()) {
4486 __kmp_aff_depth = depth;
4488 int core_level = __kmp_affinity_find_core_level(
4489 address2os, __kmp_avail_proc, depth - 1);
4490 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4491 depth - 1, core_level);
4492 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4493 address2os, __kmp_avail_proc, depth - 1, core_level);
4495 int nproc = ncores * maxprocpercore;
4496 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4497 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4498 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4500 __kmp_affinity_type = affinity_none;
4504 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4505 for (
int i = 0; i < nproc; i++) {
4511 for (
int i = 0; i < __kmp_avail_proc; i++) {
4512 int proc = address2os[i].second;
4514 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4516 if (core == lastcore) {
4523 procarr[core * maxprocpercore + inlastcore] = proc;
4531 if (__kmp_affinity_dups) {
4532 __kmp_affinity_num_masks = __kmp_avail_proc;
4534 __kmp_affinity_num_masks = numUnique;
4538 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4539 (__kmp_affinity_num_places > 0) &&
4540 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4541 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4545 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4549 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4550 __kmp_affinity_cmp_Address_child_num);
4554 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4555 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4558 unsigned osId = address2os[i].second;
4559 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4560 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4561 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4562 KMP_CPU_COPY(dest, src);
4563 if (++j >= __kmp_affinity_num_masks) {
4567 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4572 KMP_ASSERT2(0,
"Unexpected affinity setting");
4575 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4576 machine_hierarchy.init(address2os, __kmp_avail_proc);
4578 #undef KMP_EXIT_AFF_NONE 4580 void __kmp_affinity_initialize(
void) {
4589 int disabled = (__kmp_affinity_type == affinity_disabled);
4590 if (!KMP_AFFINITY_CAPABLE()) {
4591 KMP_ASSERT(disabled);
4594 __kmp_affinity_type = affinity_none;
4596 __kmp_aux_affinity_initialize();
4598 __kmp_affinity_type = affinity_disabled;
4602 void __kmp_affinity_uninitialize(
void) {
4603 if (__kmp_affinity_masks != NULL) {
4604 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4605 __kmp_affinity_masks = NULL;
4607 if (__kmp_affin_fullMask != NULL) {
4608 KMP_CPU_FREE(__kmp_affin_fullMask);
4609 __kmp_affin_fullMask = NULL;
4611 __kmp_affinity_num_masks = 0;
4612 __kmp_affinity_type = affinity_default;
4614 __kmp_affinity_num_places = 0;
4616 if (__kmp_affinity_proclist != NULL) {
4617 __kmp_free(__kmp_affinity_proclist);
4618 __kmp_affinity_proclist = NULL;
4620 if (address2os != NULL) {
4621 __kmp_free(address2os);
4624 if (procarr != NULL) {
4625 __kmp_free(procarr);
4629 if (__kmp_hwloc_topology != NULL) {
4630 hwloc_topology_destroy(__kmp_hwloc_topology);
4631 __kmp_hwloc_topology = NULL;
4634 KMPAffinity::destroy_api();
4637 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4638 if (!KMP_AFFINITY_CAPABLE()) {
4642 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4643 if (th->th.th_affin_mask == NULL) {
4644 KMP_CPU_ALLOC(th->th.th_affin_mask);
4646 KMP_CPU_ZERO(th->th.th_affin_mask);
4653 kmp_affin_mask_t *mask;
4657 if (KMP_AFFINITY_NON_PROC_BIND)
4660 if ((__kmp_affinity_type == affinity_none) ||
4661 (__kmp_affinity_type == affinity_balanced)) {
4662 #if KMP_GROUP_AFFINITY 4663 if (__kmp_num_proc_groups > 1) {
4667 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4669 mask = __kmp_affin_fullMask;
4671 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4672 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4673 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4679 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4680 #if KMP_GROUP_AFFINITY 4681 if (__kmp_num_proc_groups > 1) {
4685 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4687 mask = __kmp_affin_fullMask;
4691 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4692 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4693 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4699 th->th.th_current_place = i;
4701 th->th.th_new_place = i;
4702 th->th.th_first_place = 0;
4703 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4706 if (i == KMP_PLACE_ALL) {
4707 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4710 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4717 (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4720 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4725 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4727 if (__kmp_affinity_verbose
4729 && (__kmp_affinity_type == affinity_none ||
4730 (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
4731 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4732 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4733 th->th.th_affin_mask);
4734 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4735 __kmp_gettid(), gtid, buf);
4742 if (__kmp_affinity_type == affinity_none) {
4743 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4746 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4751 void __kmp_affinity_set_place(
int gtid) {
4752 if (!KMP_AFFINITY_CAPABLE()) {
4756 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4758 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current " 4760 gtid, th->th.th_new_place, th->th.th_current_place));
4763 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4764 KMP_ASSERT(th->th.th_new_place >= 0);
4765 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4766 if (th->th.th_first_place <= th->th.th_last_place) {
4767 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4768 (th->th.th_new_place <= th->th.th_last_place));
4770 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4771 (th->th.th_new_place >= th->th.th_last_place));
4776 kmp_affin_mask_t *mask =
4777 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4778 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4779 th->th.th_current_place = th->th.th_new_place;
4781 if (__kmp_affinity_verbose) {
4782 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4783 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4784 th->th.th_affin_mask);
4785 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4786 __kmp_gettid(), gtid, buf);
4788 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4793 int __kmp_aux_set_affinity(
void **mask) {
4798 if (!KMP_AFFINITY_CAPABLE()) {
4802 gtid = __kmp_entry_gtid();
4804 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4805 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4806 (kmp_affin_mask_t *)(*mask));
4808 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4812 if (__kmp_env_consistency_check) {
4813 if ((mask == NULL) || (*mask == NULL)) {
4814 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4819 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4820 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4821 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4823 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4828 if (num_procs == 0) {
4829 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4832 #if KMP_GROUP_AFFINITY 4833 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4834 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4840 th = __kmp_threads[gtid];
4841 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4842 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4844 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4848 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4849 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4850 th->th.th_first_place = 0;
4851 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4854 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4860 int __kmp_aux_get_affinity(
void **mask) {
4865 if (!KMP_AFFINITY_CAPABLE()) {
4869 gtid = __kmp_entry_gtid();
4870 th = __kmp_threads[gtid];
4871 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4874 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4875 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4876 th->th.th_affin_mask);
4877 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4881 if (__kmp_env_consistency_check) {
4882 if ((mask == NULL) || (*mask == NULL)) {
4883 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4889 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4891 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4892 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4893 (kmp_affin_mask_t *)(*mask));
4894 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4901 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4907 int __kmp_aux_get_affinity_max_proc() {
4908 if (!KMP_AFFINITY_CAPABLE()) {
4911 #if KMP_GROUP_AFFINITY 4912 if (__kmp_num_proc_groups > 1) {
4913 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4919 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4920 if (!KMP_AFFINITY_CAPABLE()) {
4925 int gtid = __kmp_entry_gtid();
4926 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4927 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4928 (kmp_affin_mask_t *)(*mask));
4929 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " 4930 "affinity mask for thread %d = %s\n",
4934 if (__kmp_env_consistency_check) {
4935 if ((mask == NULL) || (*mask == NULL)) {
4936 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4940 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4943 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4947 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4951 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
4952 if (!KMP_AFFINITY_CAPABLE()) {
4957 int gtid = __kmp_entry_gtid();
4958 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4959 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4960 (kmp_affin_mask_t *)(*mask));
4961 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " 4962 "affinity mask for thread %d = %s\n",
4966 if (__kmp_env_consistency_check) {
4967 if ((mask == NULL) || (*mask == NULL)) {
4968 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4972 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4975 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4979 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4983 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
4984 if (!KMP_AFFINITY_CAPABLE()) {
4989 int gtid = __kmp_entry_gtid();
4990 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4991 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4992 (kmp_affin_mask_t *)(*mask));
4993 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " 4994 "affinity mask for thread %d = %s\n",
4998 if (__kmp_env_consistency_check) {
4999 if ((mask == NULL) || (*mask == NULL)) {
5000 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
5004 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
5007 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
5011 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
5015 void __kmp_balanced_affinity(kmp_info_t *th,
int nthreads) {
5016 KMP_DEBUG_ASSERT(th);
5017 bool fine_gran =
true;
5018 int tid = th->th.th_info.ds.ds_tid;
5020 switch (__kmp_affinity_gran) {
5021 case affinity_gran_fine:
5022 case affinity_gran_thread:
5024 case affinity_gran_core:
5025 if (__kmp_nThreadsPerCore > 1) {
5029 case affinity_gran_package:
5030 if (nCoresPerPkg > 1) {
5038 if (__kmp_affinity_uniform_topology()) {
5042 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
5044 int ncores = __kmp_ncores;
5045 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
5046 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
5050 int chunk = nthreads / ncores;
5052 int big_cores = nthreads % ncores;
5054 int big_nth = (chunk + 1) * big_cores;
5055 if (tid < big_nth) {
5056 coreID = tid / (chunk + 1);
5057 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
5059 coreID = (tid - big_cores) / chunk;
5060 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
5063 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
5064 "Illegal set affinity operation when not capable");
5066 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5070 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
5071 KMP_CPU_SET(osID, mask);
5073 for (
int i = 0; i < __kmp_nth_per_core; i++) {
5075 osID = address2os[coreID * __kmp_nth_per_core + i].second;
5076 KMP_CPU_SET(osID, mask);
5079 if (__kmp_affinity_verbose) {
5080 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5081 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5082 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5083 __kmp_gettid(), tid, buf);
5085 __kmp_set_system_affinity(mask, TRUE);
5088 kmp_affin_mask_t *mask = th->th.th_affin_mask;
5091 int core_level = __kmp_affinity_find_core_level(
5092 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
5093 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
5094 __kmp_aff_depth - 1, core_level);
5095 int nth_per_core = __kmp_affinity_max_proc_per_core(
5096 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
5100 if (nthreads == __kmp_avail_proc) {
5102 int osID = address2os[tid].second;
5103 KMP_CPU_SET(osID, mask);
5105 int core = __kmp_affinity_find_core(address2os, tid,
5106 __kmp_aff_depth - 1, core_level);
5107 for (
int i = 0; i < __kmp_avail_proc; i++) {
5108 int osID = address2os[i].second;
5109 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
5110 core_level) == core) {
5111 KMP_CPU_SET(osID, mask);
5115 }
else if (nthreads <= ncores) {
5118 for (
int i = 0; i < ncores; i++) {
5121 for (
int j = 0; j < nth_per_core; j++) {
5122 if (procarr[i * nth_per_core + j] != -1) {
5129 for (
int j = 0; j < nth_per_core; j++) {
5130 int osID = procarr[i * nth_per_core + j];
5132 KMP_CPU_SET(osID, mask);
5148 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
5150 int *ncores_with_x_procs =
5151 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5153 int *ncores_with_x_to_max_procs =
5154 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
5156 for (
int i = 0; i <= nth_per_core; i++) {
5157 ncores_with_x_procs[i] = 0;
5158 ncores_with_x_to_max_procs[i] = 0;
5161 for (
int i = 0; i < ncores; i++) {
5163 for (
int j = 0; j < nth_per_core; j++) {
5164 if (procarr[i * nth_per_core + j] != -1) {
5168 nproc_at_core[i] = cnt;
5169 ncores_with_x_procs[cnt]++;
5172 for (
int i = 0; i <= nth_per_core; i++) {
5173 for (
int j = i; j <= nth_per_core; j++) {
5174 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
5179 int nproc = nth_per_core * ncores;
5181 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
5182 for (
int i = 0; i < nproc; i++) {
5189 for (
int j = 1; j <= nth_per_core; j++) {
5190 int cnt = ncores_with_x_to_max_procs[j];
5191 for (
int i = 0; i < ncores; i++) {
5193 if (nproc_at_core[i] == 0) {
5196 for (
int k = 0; k < nth_per_core; k++) {
5197 if (procarr[i * nth_per_core + k] != -1) {
5198 if (newarr[i * nth_per_core + k] == 0) {
5199 newarr[i * nth_per_core + k] = 1;
5205 newarr[i * nth_per_core + k]++;
5213 if (cnt == 0 || nth == 0) {
5224 for (
int i = 0; i < nproc; i++) {
5228 int osID = procarr[i];
5229 KMP_CPU_SET(osID, mask);
5231 int coreID = i / nth_per_core;
5232 for (
int ii = 0; ii < nth_per_core; ii++) {
5233 int osID = procarr[coreID * nth_per_core + ii];
5235 KMP_CPU_SET(osID, mask);
5245 if (__kmp_affinity_verbose) {
5246 char buf[KMP_AFFIN_MASK_PRINT_LEN];
5247 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
5248 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
5249 __kmp_gettid(), tid, buf);
5251 __kmp_set_system_affinity(mask, TRUE);
5269 kmp_set_thread_affinity_mask_initial()
5274 int gtid = __kmp_get_gtid();
5277 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5278 "non-omp thread, returning\n"));
5281 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5282 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5283 "affinity not initialized, returning\n"));
5286 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5287 "set full mask for thread %d\n",
5289 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5290 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5294 #endif // KMP_AFFINITY_SUPPORTED