15 #include "kmp_affinity.h" 16 #include "kmp_atomic.h" 17 #include "kmp_environment.h" 18 #include "kmp_error.h" 22 #include "kmp_settings.h" 23 #include "kmp_stats.h" 25 #include "kmp_wait_release.h" 26 #include "kmp_wrapper_getpid.h" 27 #include "kmp_dispatch.h" 28 #if KMP_USE_HIER_SCHED 29 #include "kmp_dispatch_hier.h" 33 #include "ompt-specific.h" 37 #define KMP_USE_PRCTL 0 43 #include "tsan_annotations.h" 45 #if defined(KMP_GOMP_COMPAT) 46 char const __kmp_version_alt_comp[] =
47 KMP_VERSION_PREFIX
"alternative compiler support: yes";
50 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 62 char const __kmp_version_lock[] =
63 KMP_VERSION_PREFIX
"lock type: run time selectable";
66 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 71 kmp_info_t __kmp_monitor;
76 void __kmp_cleanup(
void);
78 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
80 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
81 kmp_internal_control_t *new_icvs,
83 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 84 static void __kmp_partition_places(kmp_team_t *team,
85 int update_master_only = 0);
87 static void __kmp_do_serial_initialize(
void);
88 void __kmp_fork_barrier(
int gtid,
int tid);
89 void __kmp_join_barrier(
int gtid);
90 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
91 kmp_internal_control_t *new_icvs,
ident_t *loc);
93 #ifdef USE_LOAD_BALANCE 94 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
97 static int __kmp_expand_threads(
int nNeed);
99 static int __kmp_unregister_root_other_thread(
int gtid);
101 static void __kmp_unregister_library(
void);
102 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
103 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
108 int __kmp_get_global_thread_id() {
110 kmp_info_t **other_threads;
118 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
119 __kmp_nth, __kmp_all_nth));
126 if (!TCR_4(__kmp_init_gtid))
129 #ifdef KMP_TDATA_GTID 130 if (TCR_4(__kmp_gtid_mode) >= 3) {
131 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
135 if (TCR_4(__kmp_gtid_mode) >= 2) {
136 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
137 return __kmp_gtid_get_specific();
139 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
141 stack_addr = (
char *)&stack_data;
142 other_threads = __kmp_threads;
155 for (i = 0; i < __kmp_threads_capacity; i++) {
157 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
161 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
162 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
166 if (stack_addr <= stack_base) {
167 size_t stack_diff = stack_base - stack_addr;
169 if (stack_diff <= stack_size) {
172 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
180 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 181 "thread, using TLS\n"));
182 i = __kmp_gtid_get_specific();
192 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
193 KMP_FATAL(StackOverflow, i);
196 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
197 if (stack_addr > stack_base) {
198 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
199 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
200 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 stack_base - stack_addr);
208 if (__kmp_storage_map) {
209 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
210 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
211 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
212 other_threads[i]->th.th_info.ds.ds_stacksize,
213 "th_%d stack (refinement)", i);
218 int __kmp_get_global_thread_id_reg() {
221 if (!__kmp_init_serial) {
224 #ifdef KMP_TDATA_GTID 225 if (TCR_4(__kmp_gtid_mode) >= 3) {
226 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
230 if (TCR_4(__kmp_gtid_mode) >= 2) {
231 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
232 gtid = __kmp_gtid_get_specific();
235 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
236 gtid = __kmp_get_global_thread_id();
240 if (gtid == KMP_GTID_DNE) {
242 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 243 "Registering a new gtid.\n"));
244 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
245 if (!__kmp_init_serial) {
246 __kmp_do_serial_initialize();
247 gtid = __kmp_gtid_get_specific();
249 gtid = __kmp_register_root(FALSE);
251 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
255 KMP_DEBUG_ASSERT(gtid >= 0);
261 void __kmp_check_stack_overlap(kmp_info_t *th) {
263 char *stack_beg = NULL;
264 char *stack_end = NULL;
267 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
268 if (__kmp_storage_map) {
269 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
270 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
272 gtid = __kmp_gtid_from_thread(th);
274 if (gtid == KMP_GTID_MONITOR) {
275 __kmp_print_storage_map_gtid(
276 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
277 "th_%s stack (%s)",
"mon",
278 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
280 __kmp_print_storage_map_gtid(
281 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
282 "th_%d stack (%s)", gtid,
283 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 gtid = __kmp_gtid_from_thread(th);
290 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
292 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
293 if (stack_beg == NULL) {
294 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
295 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
298 for (f = 0; f < __kmp_threads_capacity; f++) {
299 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
301 if (f_th && f_th != th) {
302 char *other_stack_end =
303 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
304 char *other_stack_beg =
305 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
306 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
307 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
310 if (__kmp_storage_map)
311 __kmp_print_storage_map_gtid(
312 -1, other_stack_beg, other_stack_end,
313 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
314 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
316 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
322 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
327 void __kmp_infinite_loop(
void) {
328 static int done = FALSE;
335 #define MAX_MESSAGE 512 337 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
338 char const *format, ...) {
339 char buffer[MAX_MESSAGE];
342 va_start(ap, format);
343 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
344 p2, (
unsigned long)size, format);
345 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
346 __kmp_vprintf(kmp_err, buffer, ap);
347 #if KMP_PRINT_DATA_PLACEMENT 350 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
351 if (__kmp_storage_map_verbose) {
352 node = __kmp_get_host_node(p1);
354 __kmp_storage_map_verbose = FALSE;
358 int localProc = __kmp_get_cpu_from_gtid(gtid);
360 const int page_size = KMP_GET_PAGE_SIZE();
362 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
363 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
365 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
368 __kmp_printf_no_lock(
" GTID %d\n", gtid);
377 (
char *)p1 += page_size;
378 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
379 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
384 (
char *)p1 + (page_size - 1),
385 __kmp_get_host_node(p1));
387 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
388 (
char *)p2 + (page_size - 1),
389 __kmp_get_host_node(p2));
395 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
398 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
401 void __kmp_warn(
char const *format, ...) {
402 char buffer[MAX_MESSAGE];
405 if (__kmp_generate_warnings == kmp_warnings_off) {
409 va_start(ap, format);
411 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
412 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
413 __kmp_vprintf(kmp_err, buffer, ap);
414 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
419 void __kmp_abort_process() {
421 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
423 if (__kmp_debug_buf) {
424 __kmp_dump_debug_buffer();
427 if (KMP_OS_WINDOWS) {
430 __kmp_global.g.g_abort = SIGABRT;
447 __kmp_infinite_loop();
448 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
452 void __kmp_abort_thread(
void) {
455 __kmp_infinite_loop();
461 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
462 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
465 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
466 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
468 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
469 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
471 __kmp_print_storage_map_gtid(
472 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
473 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
476 &thr->th.th_bar[bs_plain_barrier + 1],
477 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
481 &thr->th.th_bar[bs_forkjoin_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
485 #if KMP_FAST_REDUCTION_BARRIER 486 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
487 &thr->th.th_bar[bs_reduction_barrier + 1],
488 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
490 #endif // KMP_FAST_REDUCTION_BARRIER 496 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
497 int team_id,
int num_thr) {
498 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
499 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
502 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
503 &team->t.t_bar[bs_last_barrier],
504 sizeof(kmp_balign_team_t) * bs_last_barrier,
505 "%s_%d.t_bar", header, team_id);
507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
508 &team->t.t_bar[bs_plain_barrier + 1],
509 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
513 &team->t.t_bar[bs_forkjoin_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[forkjoin]", header, team_id);
517 #if KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
519 &team->t.t_bar[bs_reduction_barrier + 1],
520 sizeof(kmp_balign_team_t),
521 "%s_%d.t_bar[reduction]", header, team_id);
522 #endif // KMP_FAST_REDUCTION_BARRIER 524 __kmp_print_storage_map_gtid(
525 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
526 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
528 __kmp_print_storage_map_gtid(
529 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
530 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
533 &team->t.t_disp_buffer[num_disp_buff],
534 sizeof(dispatch_shared_info_t) * num_disp_buff,
535 "%s_%d.t_disp_buffer", header, team_id);
537 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
538 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
542 static void __kmp_init_allocator() {
544 __kmp_init_memkind();
547 static void __kmp_fini_allocator() {
549 __kmp_fini_memkind();
558 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
560 __kmp_init_bootstrap_lock(lck);
563 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
581 for (i = 0; i < __kmp_threads_capacity; ++i) {
584 kmp_info_t *th = __kmp_threads[i];
587 int gtid = th->th.th_info.ds.ds_gtid;
588 if (gtid == gtid_req)
593 int alive = __kmp_is_thread_alive(th, &exit_val);
598 if (thread_count == 0)
604 __kmp_reset_lock(&__kmp_forkjoin_lock);
606 __kmp_reset_lock(&__kmp_stdio_lock);
610 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
615 case DLL_PROCESS_ATTACH:
616 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
620 case DLL_PROCESS_DETACH:
621 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
623 if (lpReserved != NULL) {
649 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
652 __kmp_internal_end_library(__kmp_gtid_get_specific());
656 case DLL_THREAD_ATTACH:
657 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
663 case DLL_THREAD_DETACH:
664 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
666 __kmp_internal_end_thread(__kmp_gtid_get_specific());
678 int __kmp_change_library(
int status) {
681 old_status = __kmp_yield_init &
685 __kmp_yield_init |= 1;
687 __kmp_yield_init &= ~1;
695 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
696 int gtid = *gtid_ref;
697 #ifdef BUILD_PARALLEL_ORDERED 698 kmp_team_t *team = __kmp_team_from_gtid(gtid);
701 if (__kmp_env_consistency_check) {
702 if (__kmp_threads[gtid]->th.th_root->r.r_active)
703 #if KMP_USE_DYNAMIC_LOCK 704 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
706 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
709 #ifdef BUILD_PARALLEL_ORDERED 710 if (!team->t.t_serialized) {
712 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
720 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
721 int gtid = *gtid_ref;
722 #ifdef BUILD_PARALLEL_ORDERED 723 int tid = __kmp_tid_from_gtid(gtid);
724 kmp_team_t *team = __kmp_team_from_gtid(gtid);
727 if (__kmp_env_consistency_check) {
728 if (__kmp_threads[gtid]->th.th_root->r.r_active)
729 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
731 #ifdef BUILD_PARALLEL_ORDERED 732 if (!team->t.t_serialized) {
737 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
747 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
752 if (!TCR_4(__kmp_init_parallel))
753 __kmp_parallel_initialize();
755 th = __kmp_threads[gtid];
756 team = th->th.th_team;
759 th->th.th_ident = id_ref;
761 if (team->t.t_serialized) {
764 kmp_int32 old_this = th->th.th_local.this_construct;
766 ++th->th.th_local.this_construct;
770 if (team->t.t_construct == old_this) {
771 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
772 th->th.th_local.this_construct);
775 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
776 KMP_MASTER_GTID(gtid) &&
778 th->th.th_teams_microtask == NULL &&
780 team->t.t_active_level ==
782 __kmp_itt_metadata_single(id_ref);
787 if (__kmp_env_consistency_check) {
788 if (status && push_ws) {
789 __kmp_push_workshare(gtid, ct_psingle, id_ref);
791 __kmp_check_workshare(gtid, ct_psingle, id_ref);
796 __kmp_itt_single_start(gtid);
802 void __kmp_exit_single(
int gtid) {
804 __kmp_itt_single_end(gtid);
806 if (__kmp_env_consistency_check)
807 __kmp_pop_workshare(gtid, ct_psingle, NULL);
816 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
817 int master_tid,
int set_nthreads
825 KMP_DEBUG_ASSERT(__kmp_init_serial);
826 KMP_DEBUG_ASSERT(root && parent_team);
830 new_nthreads = set_nthreads;
831 if (!get__dynamic_2(parent_team, master_tid)) {
834 #ifdef USE_LOAD_BALANCE 835 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
836 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
837 if (new_nthreads == 1) {
838 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 839 "reservation to 1 thread\n",
843 if (new_nthreads < set_nthreads) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 845 "reservation to %d threads\n",
846 master_tid, new_nthreads));
850 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
851 new_nthreads = __kmp_avail_proc - __kmp_nth +
852 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
853 if (new_nthreads <= 1) {
854 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 855 "reservation to 1 thread\n",
859 if (new_nthreads < set_nthreads) {
860 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 861 "reservation to %d threads\n",
862 master_tid, new_nthreads));
864 new_nthreads = set_nthreads;
866 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
867 if (set_nthreads > 2) {
868 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
869 new_nthreads = (new_nthreads % set_nthreads) + 1;
870 if (new_nthreads == 1) {
871 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 872 "reservation to 1 thread\n",
876 if (new_nthreads < set_nthreads) {
877 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 878 "reservation to %d threads\n",
879 master_tid, new_nthreads));
887 if (__kmp_nth + new_nthreads -
888 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
890 int tl_nthreads = __kmp_max_nth - __kmp_nth +
891 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
892 if (tl_nthreads <= 0) {
897 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
898 __kmp_reserve_warn = 1;
899 __kmp_msg(kmp_ms_warning,
900 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
901 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
903 if (tl_nthreads == 1) {
904 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 905 "reduced reservation to 1 thread\n",
909 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 910 "reservation to %d threads\n",
911 master_tid, tl_nthreads));
912 new_nthreads = tl_nthreads;
916 if (root->r.r_cg_nthreads + new_nthreads -
917 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
919 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
920 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
921 if (tl_nthreads <= 0) {
926 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
927 __kmp_reserve_warn = 1;
928 __kmp_msg(kmp_ms_warning,
929 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
930 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
932 if (tl_nthreads == 1) {
933 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 934 "reduced reservation to 1 thread\n",
938 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 939 "reservation to %d threads\n",
940 master_tid, tl_nthreads));
941 new_nthreads = tl_nthreads;
947 capacity = __kmp_threads_capacity;
948 if (TCR_PTR(__kmp_threads[0]) == NULL) {
951 if (__kmp_nth + new_nthreads -
952 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
955 int slotsRequired = __kmp_nth + new_nthreads -
956 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
958 int slotsAdded = __kmp_expand_threads(slotsRequired);
959 if (slotsAdded < slotsRequired) {
961 new_nthreads -= (slotsRequired - slotsAdded);
962 KMP_ASSERT(new_nthreads >= 1);
965 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
966 __kmp_reserve_warn = 1;
967 if (__kmp_tp_cached) {
968 __kmp_msg(kmp_ms_warning,
969 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
970 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
971 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
973 __kmp_msg(kmp_ms_warning,
974 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
975 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
982 if (new_nthreads == 1) {
984 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 985 "dead roots and rechecking; requested %d threads\n",
986 __kmp_get_gtid(), set_nthreads));
988 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 990 __kmp_get_gtid(), new_nthreads, set_nthreads));
999 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
1000 kmp_info_t *master_th,
int master_gtid) {
1004 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
1005 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
1009 master_th->th.th_info.ds.ds_tid = 0;
1010 master_th->th.th_team = team;
1011 master_th->th.th_team_nproc = team->t.t_nproc;
1012 master_th->th.th_team_master = master_th;
1013 master_th->th.th_team_serialized = FALSE;
1014 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1017 #if KMP_NESTED_HOT_TEAMS 1019 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1022 int level = team->t.t_active_level - 1;
1023 if (master_th->th.th_teams_microtask) {
1024 if (master_th->th.th_teams_size.nteams > 1) {
1028 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1029 master_th->th.th_teams_level == team->t.t_level) {
1034 if (level < __kmp_hot_teams_max_level) {
1035 if (hot_teams[level].hot_team) {
1037 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1041 hot_teams[level].hot_team = team;
1042 hot_teams[level].hot_team_nth = team->t.t_nproc;
1049 use_hot_team = team == root->r.r_hot_team;
1051 if (!use_hot_team) {
1054 team->t.t_threads[0] = master_th;
1055 __kmp_initialize_info(master_th, team, 0, master_gtid);
1058 for (i = 1; i < team->t.t_nproc; i++) {
1061 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1062 team->t.t_threads[i] = thr;
1063 KMP_DEBUG_ASSERT(thr);
1064 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1066 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1067 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1068 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1069 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1070 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1071 team->t.t_bar[bs_plain_barrier].b_arrived));
1073 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1074 thr->th.th_teams_level = master_th->th.th_teams_level;
1075 thr->th.th_teams_size = master_th->th.th_teams_size;
1079 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1080 for (b = 0; b < bs_last_barrier; ++b) {
1081 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1082 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1084 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1090 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1091 __kmp_partition_places(team);
1098 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1102 inline static void propagateFPControl(kmp_team_t *team) {
1103 if (__kmp_inherit_fp_control) {
1104 kmp_int16 x87_fpu_control_word;
1108 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1109 __kmp_store_mxcsr(&mxcsr);
1110 mxcsr &= KMP_X86_MXCSR_MASK;
1121 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1122 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1125 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1129 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1135 inline static void updateHWFPControl(kmp_team_t *team) {
1136 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1139 kmp_int16 x87_fpu_control_word;
1141 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1142 __kmp_store_mxcsr(&mxcsr);
1143 mxcsr &= KMP_X86_MXCSR_MASK;
1145 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1146 __kmp_clear_x87_fpu_status_word();
1147 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1150 if (team->t.t_mxcsr != mxcsr) {
1151 __kmp_load_mxcsr(&team->t.t_mxcsr);
1156 #define propagateFPControl(x) ((void)0) 1157 #define updateHWFPControl(x) ((void)0) 1160 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1165 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1166 kmp_info_t *this_thr;
1167 kmp_team_t *serial_team;
1169 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1176 if (!TCR_4(__kmp_init_parallel))
1177 __kmp_parallel_initialize();
1179 this_thr = __kmp_threads[global_tid];
1180 serial_team = this_thr->th.th_serial_team;
1183 KMP_DEBUG_ASSERT(serial_team);
1186 if (__kmp_tasking_mode != tskm_immediate_exec) {
1188 this_thr->th.th_task_team ==
1189 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1190 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1192 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1193 "team %p, new task_team = NULL\n",
1194 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1195 this_thr->th.th_task_team = NULL;
1199 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1200 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1201 proc_bind = proc_bind_false;
1202 }
else if (proc_bind == proc_bind_default) {
1205 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1208 this_thr->th.th_set_proc_bind = proc_bind_default;
1212 ompt_data_t ompt_parallel_data = ompt_data_none;
1213 ompt_data_t *implicit_task_data;
1214 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1215 if (ompt_enabled.enabled &&
1216 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1218 ompt_task_info_t *parent_task_info;
1219 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1221 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1222 if (ompt_enabled.ompt_callback_parallel_begin) {
1225 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1226 &(parent_task_info->task_data), &(parent_task_info->frame),
1227 &ompt_parallel_data, team_size, ompt_parallel_invoker_program,
1231 #endif // OMPT_SUPPORT 1233 if (this_thr->th.th_team != serial_team) {
1235 int level = this_thr->th.th_team->t.t_level;
1237 if (serial_team->t.t_serialized) {
1240 kmp_team_t *new_team;
1242 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1244 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1251 &this_thr->th.th_current_task->td_icvs,
1252 0 USE_NESTED_HOT_ARG(NULL));
1253 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1254 KMP_ASSERT(new_team);
1257 new_team->t.t_threads[0] = this_thr;
1258 new_team->t.t_parent = this_thr->th.th_team;
1259 serial_team = new_team;
1260 this_thr->th.th_serial_team = serial_team;
1264 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1265 global_tid, serial_team));
1273 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1274 global_tid, serial_team));
1278 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1279 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1280 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1281 serial_team->t.t_ident = loc;
1282 serial_team->t.t_serialized = 1;
1283 serial_team->t.t_nproc = 1;
1284 serial_team->t.t_parent = this_thr->th.th_team;
1285 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1286 this_thr->th.th_team = serial_team;
1287 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1289 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1290 this_thr->th.th_current_task));
1291 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1292 this_thr->th.th_current_task->td_flags.executing = 0;
1294 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1299 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1300 &this_thr->th.th_current_task->td_parent->td_icvs);
1304 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1305 this_thr->th.th_current_task->td_icvs.nproc =
1306 __kmp_nested_nth.nth[level + 1];
1310 if (__kmp_nested_proc_bind.used &&
1311 (level + 1 < __kmp_nested_proc_bind.used)) {
1312 this_thr->th.th_current_task->td_icvs.proc_bind =
1313 __kmp_nested_proc_bind.bind_types[level + 1];
1318 serial_team->t.t_pkfn = (microtask_t)(~0);
1320 this_thr->th.th_info.ds.ds_tid = 0;
1323 this_thr->th.th_team_nproc = 1;
1324 this_thr->th.th_team_master = this_thr;
1325 this_thr->th.th_team_serialized = 1;
1327 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1328 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1330 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1333 propagateFPControl(serial_team);
1336 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1337 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1338 serial_team->t.t_dispatch->th_disp_buffer =
1339 (dispatch_private_info_t *)__kmp_allocate(
1340 sizeof(dispatch_private_info_t));
1342 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1349 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1350 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1351 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1352 ++serial_team->t.t_serialized;
1353 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1356 int level = this_thr->th.th_team->t.t_level;
1359 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1360 this_thr->th.th_current_task->td_icvs.nproc =
1361 __kmp_nested_nth.nth[level + 1];
1363 serial_team->t.t_level++;
1364 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1365 "of serial team %p to %d\n",
1366 global_tid, serial_team, serial_team->t.t_level));
1369 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1371 dispatch_private_info_t *disp_buffer =
1372 (dispatch_private_info_t *)__kmp_allocate(
1373 sizeof(dispatch_private_info_t));
1374 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1375 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1377 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1382 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1385 if (__kmp_env_consistency_check)
1386 __kmp_push_parallel(global_tid, NULL);
1388 serial_team->t.ompt_team_info.master_return_address = codeptr;
1389 if (ompt_enabled.enabled &&
1390 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1391 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1393 ompt_lw_taskteam_t lw_taskteam;
1394 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1395 &ompt_parallel_data, codeptr);
1397 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1401 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1402 if (ompt_enabled.ompt_callback_implicit_task) {
1403 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1404 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1405 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1406 OMPT_CUR_TASK_INFO(this_thr)
1407 ->thread_num = __kmp_tid_from_gtid(global_tid);
1411 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1412 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1419 int __kmp_fork_call(
ident_t *loc,
int gtid,
1420 enum fork_context_e call_context,
1421 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1423 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1432 int master_this_cons;
1434 kmp_team_t *parent_team;
1435 kmp_info_t *master_th;
1439 int master_set_numthreads;
1445 #if KMP_NESTED_HOT_TEAMS 1446 kmp_hot_team_ptr_t **p_hot_teams;
1449 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1452 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1453 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1456 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1458 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1459 __kmp_stkpadding += (short)((kmp_int64)dummy);
1465 if (!TCR_4(__kmp_init_parallel))
1466 __kmp_parallel_initialize();
1469 master_th = __kmp_threads[gtid];
1471 parent_team = master_th->th.th_team;
1472 master_tid = master_th->th.th_info.ds.ds_tid;
1473 master_this_cons = master_th->th.th_local.this_construct;
1474 root = master_th->th.th_root;
1475 master_active = root->r.r_active;
1476 master_set_numthreads = master_th->th.th_set_nproc;
1479 ompt_data_t ompt_parallel_data = ompt_data_none;
1480 ompt_data_t *parent_task_data;
1481 omp_frame_t *ompt_frame;
1482 ompt_data_t *implicit_task_data;
1483 void *return_address = NULL;
1485 if (ompt_enabled.enabled) {
1486 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1488 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1493 level = parent_team->t.t_level;
1495 active_level = parent_team->t.t_active_level;
1498 teams_level = master_th->th.th_teams_level;
1500 #if KMP_NESTED_HOT_TEAMS 1501 p_hot_teams = &master_th->th.th_hot_teams;
1502 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1503 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1504 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1505 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1507 (*p_hot_teams)[0].hot_team_nth = 1;
1512 if (ompt_enabled.enabled) {
1513 if (ompt_enabled.ompt_callback_parallel_begin) {
1514 int team_size = master_set_numthreads
1515 ? master_set_numthreads
1516 : get__nproc_2(parent_team, master_tid);
1517 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1518 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1519 OMPT_INVOKER(call_context), return_address);
1521 master_th->th.ompt_thread_info.state = omp_state_overhead;
1525 master_th->th.th_ident = loc;
1528 if (master_th->th.th_teams_microtask && ap &&
1529 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1533 parent_team->t.t_ident = loc;
1534 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1535 parent_team->t.t_argc = argc;
1536 argv = (
void **)parent_team->t.t_argv;
1537 for (i = argc - 1; i >= 0; --i)
1539 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1540 *argv++ = va_arg(*ap,
void *);
1542 *argv++ = va_arg(ap,
void *);
1545 if (parent_team == master_th->th.th_serial_team) {
1548 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1551 parent_team->t.t_serialized--;
1554 void **exit_runtime_p;
1556 ompt_lw_taskteam_t lw_taskteam;
1558 if (ompt_enabled.enabled) {
1559 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1560 &ompt_parallel_data, return_address);
1561 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
1563 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1567 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1568 if (ompt_enabled.ompt_callback_implicit_task) {
1569 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1570 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1571 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1572 OMPT_CUR_TASK_INFO(master_th)
1573 ->thread_num = __kmp_tid_from_gtid(gtid);
1577 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1579 exit_runtime_p = &dummy;
1584 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1585 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1586 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1595 *exit_runtime_p = NULL;
1596 if (ompt_enabled.enabled) {
1597 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
1598 if (ompt_enabled.ompt_callback_implicit_task) {
1599 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1600 ompt_scope_end, NULL, implicit_task_data, 1,
1601 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1603 __ompt_lw_taskteam_unlink(master_th);
1605 if (ompt_enabled.ompt_callback_parallel_end) {
1606 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1607 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1608 OMPT_INVOKER(call_context), return_address);
1610 master_th->th.ompt_thread_info.state = omp_state_overhead;
1616 parent_team->t.t_pkfn = microtask;
1617 parent_team->t.t_invoke = invoker;
1618 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1619 parent_team->t.t_active_level++;
1620 parent_team->t.t_level++;
1622 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1626 if (master_set_numthreads) {
1627 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1629 kmp_info_t **other_threads = parent_team->t.t_threads;
1630 parent_team->t.t_nproc = master_set_numthreads;
1631 for (i = 0; i < master_set_numthreads; ++i) {
1632 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1636 master_th->th.th_set_nproc = 0;
1640 if (__kmp_debugging) {
1641 int nth = __kmp_omp_num_threads(loc);
1643 master_set_numthreads = nth;
1648 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1649 "master_th=%p, gtid=%d\n",
1650 root, parent_team, master_th, gtid));
1651 __kmp_internal_fork(loc, gtid, parent_team);
1652 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1653 "master_th=%p, gtid=%d\n",
1654 root, parent_team, master_th, gtid));
1657 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1658 parent_team->t.t_id, parent_team->t.t_pkfn));
1660 if (!parent_team->t.t_invoke(gtid)) {
1661 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1663 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1664 parent_team->t.t_id, parent_team->t.t_pkfn));
1667 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1674 if (__kmp_tasking_mode != tskm_immediate_exec) {
1675 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1676 parent_team->t.t_task_team[master_th->th.th_task_state]);
1680 if (parent_team->t.t_active_level >=
1681 master_th->th.th_current_task->td_icvs.max_active_levels) {
1685 int enter_teams = ((ap == NULL && active_level == 0) ||
1686 (ap && teams_level > 0 && teams_level == level));
1689 master_set_numthreads
1690 ? master_set_numthreads
1699 if ((!get__nested(master_th) && (root->r.r_in_parallel
1704 (__kmp_library == library_serial)) {
1705 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1713 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1714 nthreads = __kmp_reserve_threads(
1715 root, parent_team, master_tid, nthreads
1726 if (nthreads == 1) {
1730 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1734 KMP_DEBUG_ASSERT(nthreads > 0);
1737 master_th->th.th_set_nproc = 0;
1740 if (nthreads == 1) {
1742 #if KMP_OS_LINUX && \ 1743 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1746 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1751 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1755 if (call_context == fork_context_intel) {
1757 master_th->th.th_serial_team->t.t_ident = loc;
1761 master_th->th.th_serial_team->t.t_level--;
1766 void **exit_runtime_p;
1767 ompt_task_info_t *task_info;
1769 ompt_lw_taskteam_t lw_taskteam;
1771 if (ompt_enabled.enabled) {
1772 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1773 &ompt_parallel_data, return_address);
1775 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1778 task_info = OMPT_CUR_TASK_INFO(master_th);
1779 exit_runtime_p = &(task_info->frame.exit_frame);
1780 if (ompt_enabled.ompt_callback_implicit_task) {
1781 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1782 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1783 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
1784 OMPT_CUR_TASK_INFO(master_th)
1785 ->thread_num = __kmp_tid_from_gtid(gtid);
1789 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1791 exit_runtime_p = &dummy;
1796 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1797 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1798 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1799 parent_team->t.t_argv
1808 if (ompt_enabled.enabled) {
1809 exit_runtime_p = NULL;
1810 if (ompt_enabled.ompt_callback_implicit_task) {
1811 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1812 ompt_scope_end, NULL, &(task_info->task_data), 1,
1813 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1816 __ompt_lw_taskteam_unlink(master_th);
1817 if (ompt_enabled.ompt_callback_parallel_end) {
1818 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1819 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1820 OMPT_INVOKER(call_context), return_address);
1822 master_th->th.ompt_thread_info.state = omp_state_overhead;
1825 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1826 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1827 master_th->th.th_serial_team);
1828 team = master_th->th.th_team;
1830 team->t.t_invoke = invoker;
1831 __kmp_alloc_argv_entries(argc, team, TRUE);
1832 team->t.t_argc = argc;
1833 argv = (
void **)team->t.t_argv;
1835 for (i = argc - 1; i >= 0; --i)
1837 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1838 *argv++ = va_arg(*ap,
void *);
1840 *argv++ = va_arg(ap,
void *);
1843 for (i = 0; i < argc; ++i)
1845 argv[i] = parent_team->t.t_argv[i];
1855 for (i = argc - 1; i >= 0; --i)
1857 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1858 *argv++ = va_arg(*ap,
void *);
1860 *argv++ = va_arg(ap,
void *);
1866 void **exit_runtime_p;
1867 ompt_task_info_t *task_info;
1869 ompt_lw_taskteam_t lw_taskteam;
1871 if (ompt_enabled.enabled) {
1872 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1873 &ompt_parallel_data, return_address);
1874 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1876 task_info = OMPT_CUR_TASK_INFO(master_th);
1877 exit_runtime_p = &(task_info->frame.exit_frame);
1880 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1881 if (ompt_enabled.ompt_callback_implicit_task) {
1882 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1883 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1884 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1885 OMPT_CUR_TASK_INFO(master_th)
1886 ->thread_num = __kmp_tid_from_gtid(gtid);
1890 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1892 exit_runtime_p = &dummy;
1897 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1898 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1899 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1908 if (ompt_enabled.enabled) {
1909 *exit_runtime_p = NULL;
1910 if (ompt_enabled.ompt_callback_implicit_task) {
1911 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1912 ompt_scope_end, NULL, &(task_info->task_data), 1,
1913 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1916 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1917 __ompt_lw_taskteam_unlink(master_th);
1918 if (ompt_enabled.ompt_callback_parallel_end) {
1919 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1920 &ompt_parallel_data, parent_task_data,
1921 OMPT_INVOKER(call_context), return_address);
1923 master_th->th.ompt_thread_info.state = omp_state_overhead;
1929 }
else if (call_context == fork_context_gnu) {
1931 ompt_lw_taskteam_t lwt;
1932 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1935 lwt.ompt_task_info.frame.exit_frame = NULL;
1936 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1941 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1944 KMP_ASSERT2(call_context < fork_context_last,
1945 "__kmp_fork_call: unknown fork_context parameter");
1948 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1955 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1956 "curtask=%p, curtask_max_aclevel=%d\n",
1957 parent_team->t.t_active_level, master_th,
1958 master_th->th.th_current_task,
1959 master_th->th.th_current_task->td_icvs.max_active_levels));
1963 master_th->th.th_current_task->td_flags.executing = 0;
1966 if (!master_th->th.th_teams_microtask || level > teams_level)
1970 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1974 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1975 if ((level + 1 < __kmp_nested_nth.used) &&
1976 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1977 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1984 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1985 kmp_proc_bind_t proc_bind_icv =
1987 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1988 proc_bind = proc_bind_false;
1990 if (proc_bind == proc_bind_default) {
1993 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1999 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2000 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2001 master_th->th.th_current_task->td_icvs.proc_bind)) {
2002 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2007 master_th->th.th_set_proc_bind = proc_bind_default;
2010 if ((nthreads_icv > 0)
2012 || (proc_bind_icv != proc_bind_default)
2015 kmp_internal_control_t new_icvs;
2016 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2017 new_icvs.next = NULL;
2018 if (nthreads_icv > 0) {
2019 new_icvs.nproc = nthreads_icv;
2023 if (proc_bind_icv != proc_bind_default) {
2024 new_icvs.proc_bind = proc_bind_icv;
2029 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2030 team = __kmp_allocate_team(root, nthreads, nthreads,
2037 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2040 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2041 team = __kmp_allocate_team(root, nthreads, nthreads,
2048 &master_th->th.th_current_task->td_icvs,
2049 argc USE_NESTED_HOT_ARG(master_th));
2052 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2055 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2056 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2057 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2058 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2059 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2061 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2064 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2067 if (!master_th->th.th_teams_microtask || level > teams_level) {
2069 int new_level = parent_team->t.t_level + 1;
2070 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2071 new_level = parent_team->t.t_active_level + 1;
2072 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2076 int new_level = parent_team->t.t_level;
2077 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2078 new_level = parent_team->t.t_active_level;
2079 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2082 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2084 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2087 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2090 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2094 propagateFPControl(team);
2096 if (__kmp_tasking_mode != tskm_immediate_exec) {
2099 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2100 parent_team->t.t_task_team[master_th->th.th_task_state]);
2101 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2102 "%p, new task_team %p / team %p\n",
2103 __kmp_gtid_from_thread(master_th),
2104 master_th->th.th_task_team, parent_team,
2105 team->t.t_task_team[master_th->th.th_task_state], team));
2107 if (active_level || master_th->th.th_task_team) {
2109 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2110 if (master_th->th.th_task_state_top >=
2111 master_th->th.th_task_state_stack_sz) {
2112 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2113 kmp_uint8 *old_stack, *new_stack;
2115 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2116 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2117 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2119 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2123 old_stack = master_th->th.th_task_state_memo_stack;
2124 master_th->th.th_task_state_memo_stack = new_stack;
2125 master_th->th.th_task_state_stack_sz = new_size;
2126 __kmp_free(old_stack);
2130 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2131 master_th->th.th_task_state;
2132 master_th->th.th_task_state_top++;
2133 #if KMP_NESTED_HOT_TEAMS 2134 if (master_th->th.th_hot_teams &&
2135 team == master_th->th.th_hot_teams[active_level].hot_team) {
2137 master_th->th.th_task_state =
2139 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2142 master_th->th.th_task_state = 0;
2143 #if KMP_NESTED_HOT_TEAMS 2147 #if !KMP_NESTED_HOT_TEAMS 2148 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2149 (team == root->r.r_hot_team));
2155 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2156 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2158 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2159 (team->t.t_master_tid == 0 &&
2160 (team->t.t_parent == root->r.r_root_team ||
2161 team->t.t_parent->t.t_serialized)));
2165 argv = (
void **)team->t.t_argv;
2169 for (i = argc - 1; i >= 0; --i) {
2171 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2172 void *new_argv = va_arg(*ap,
void *);
2174 void *new_argv = va_arg(ap,
void *);
2176 KMP_CHECK_UPDATE(*argv, new_argv);
2181 for (i = 0; i < argc; ++i) {
2183 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2189 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2190 if (!root->r.r_active)
2191 root->r.r_active = TRUE;
2193 __kmp_fork_team_threads(root, team, master_th, gtid);
2194 __kmp_setup_icv_copy(team, nthreads,
2195 &master_th->th.th_current_task->td_icvs, loc);
2198 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
2201 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2204 if (team->t.t_active_level == 1
2206 && !master_th->th.th_teams_microtask
2210 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2211 (__kmp_forkjoin_frames_mode == 3 ||
2212 __kmp_forkjoin_frames_mode == 1)) {
2213 kmp_uint64 tmp_time = 0;
2214 if (__itt_get_timestamp_ptr)
2215 tmp_time = __itt_get_timestamp();
2217 master_th->th.th_frame_time = tmp_time;
2218 if (__kmp_forkjoin_frames_mode == 3)
2219 team->t.t_region_time = tmp_time;
2223 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2224 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2226 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2232 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2235 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2236 root, team, master_th, gtid));
2239 if (__itt_stack_caller_create_ptr) {
2240 team->t.t_stack_id =
2241 __kmp_itt_stack_caller_create();
2252 __kmp_internal_fork(loc, gtid, team);
2253 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2254 "master_th=%p, gtid=%d\n",
2255 root, team, master_th, gtid));
2258 if (call_context == fork_context_gnu) {
2259 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2264 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2265 team->t.t_id, team->t.t_pkfn));
2268 if (!team->t.t_invoke(gtid)) {
2269 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2271 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2272 team->t.t_id, team->t.t_pkfn));
2275 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2278 if (ompt_enabled.enabled) {
2279 master_th->th.ompt_thread_info.state = omp_state_overhead;
2287 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2290 thread->th.ompt_thread_info.state =
2291 ((team->t.t_serialized) ? omp_state_work_serial
2292 : omp_state_work_parallel);
2295 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2296 kmp_team_t *team, ompt_data_t *parallel_data,
2297 fork_context_e fork_context,
void *codeptr) {
2298 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2299 if (ompt_enabled.ompt_callback_parallel_end) {
2300 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2301 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2305 task_info->frame.enter_frame = NULL;
2306 __kmp_join_restore_state(thread, team);
2310 void __kmp_join_call(
ident_t *loc,
int gtid
2313 enum fork_context_e fork_context
2320 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2322 kmp_team_t *parent_team;
2323 kmp_info_t *master_th;
2328 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2331 master_th = __kmp_threads[gtid];
2332 root = master_th->th.th_root;
2333 team = master_th->th.th_team;
2334 parent_team = team->t.t_parent;
2336 master_th->th.th_ident = loc;
2339 if (ompt_enabled.enabled) {
2340 master_th->th.ompt_thread_info.state = omp_state_overhead;
2345 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2346 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2347 "th_task_team = %p\n",
2348 __kmp_gtid_from_thread(master_th), team,
2349 team->t.t_task_team[master_th->th.th_task_state],
2350 master_th->th.th_task_team));
2351 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2352 team->t.t_task_team[master_th->th.th_task_state]);
2356 if (team->t.t_serialized) {
2358 if (master_th->th.th_teams_microtask) {
2360 int level = team->t.t_level;
2361 int tlevel = master_th->th.th_teams_level;
2362 if (level == tlevel) {
2366 }
else if (level == tlevel + 1) {
2370 team->t.t_serialized++;
2377 if (ompt_enabled.enabled) {
2378 __kmp_join_restore_state(master_th, parent_team);
2385 master_active = team->t.t_master_active;
2393 __kmp_internal_join(loc, gtid, team);
2397 master_th->th.th_task_state =
2405 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2406 void *codeptr = team->t.ompt_team_info.master_return_address;
2410 if (__itt_stack_caller_create_ptr) {
2411 __kmp_itt_stack_caller_destroy(
2412 (__itt_caller)team->t
2417 if (team->t.t_active_level == 1
2419 && !master_th->th.th_teams_microtask
2422 master_th->th.th_ident = loc;
2425 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2426 __kmp_forkjoin_frames_mode == 3)
2427 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2428 master_th->th.th_frame_time, 0, loc,
2429 master_th->th.th_team_nproc, 1);
2430 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2431 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2432 __kmp_itt_region_joined(gtid);
2437 if (master_th->th.th_teams_microtask && !exit_teams &&
2438 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2439 team->t.t_level == master_th->th.th_teams_level + 1) {
2446 team->t.t_active_level--;
2447 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2450 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2451 int old_num = master_th->th.th_team_nproc;
2452 int new_num = master_th->th.th_teams_size.nth;
2453 kmp_info_t **other_threads = team->t.t_threads;
2454 team->t.t_nproc = new_num;
2455 for (i = 0; i < old_num; ++i) {
2456 other_threads[i]->th.th_team_nproc = new_num;
2459 for (i = old_num; i < new_num; ++i) {
2462 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2463 for (b = 0; b < bs_last_barrier; ++b) {
2464 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2465 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2467 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2470 if (__kmp_tasking_mode != tskm_immediate_exec) {
2472 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2478 if (ompt_enabled.enabled) {
2479 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2489 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2490 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2492 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2497 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2500 if (!master_th->th.th_teams_microtask ||
2501 team->t.t_level > master_th->th.th_teams_level)
2505 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2507 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2510 if (ompt_enabled.enabled) {
2511 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2512 if (ompt_enabled.ompt_callback_implicit_task) {
2513 int ompt_team_size = team->t.t_nproc;
2514 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2515 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2516 OMPT_CUR_TASK_INFO(master_th)->thread_num);
2519 task_info->frame.exit_frame = NULL;
2520 task_info->task_data = ompt_data_none;
2524 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2526 __kmp_pop_current_task_from_thread(master_th);
2528 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2530 master_th->th.th_first_place = team->t.t_first_place;
2531 master_th->th.th_last_place = team->t.t_last_place;
2534 master_th->th.th_def_allocator = team->t.t_def_allocator;
2537 updateHWFPControl(team);
2539 if (root->r.r_active != master_active)
2540 root->r.r_active = master_active;
2542 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2550 master_th->th.th_team = parent_team;
2551 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2552 master_th->th.th_team_master = parent_team->t.t_threads[0];
2553 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2556 if (parent_team->t.t_serialized &&
2557 parent_team != master_th->th.th_serial_team &&
2558 parent_team != root->r.r_root_team) {
2559 __kmp_free_team(root,
2560 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2561 master_th->th.th_serial_team = parent_team;
2564 if (__kmp_tasking_mode != tskm_immediate_exec) {
2565 if (master_th->th.th_task_state_top >
2567 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2569 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2570 master_th->th.th_task_state;
2571 --master_th->th.th_task_state_top;
2573 master_th->th.th_task_state =
2575 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2578 master_th->th.th_task_team =
2579 parent_team->t.t_task_team[master_th->th.th_task_state];
2581 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2582 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2589 master_th->th.th_current_task->td_flags.executing = 1;
2591 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2594 if (ompt_enabled.enabled) {
2595 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2601 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2606 void __kmp_save_internal_controls(kmp_info_t *thread) {
2608 if (thread->th.th_team != thread->th.th_serial_team) {
2611 if (thread->th.th_team->t.t_serialized > 1) {
2614 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2617 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2618 thread->th.th_team->t.t_serialized) {
2623 kmp_internal_control_t *control =
2624 (kmp_internal_control_t *)__kmp_allocate(
2625 sizeof(kmp_internal_control_t));
2627 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2629 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2631 control->next = thread->th.th_team->t.t_control_stack_top;
2632 thread->th.th_team->t.t_control_stack_top = control;
2638 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2642 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2643 KMP_DEBUG_ASSERT(__kmp_init_serial);
2647 else if (new_nth > __kmp_max_nth)
2648 new_nth = __kmp_max_nth;
2651 thread = __kmp_threads[gtid];
2653 __kmp_save_internal_controls(thread);
2655 set__nproc(thread, new_nth);
2660 root = thread->th.th_root;
2661 if (__kmp_init_parallel && (!root->r.r_active) &&
2662 (root->r.r_hot_team->t.t_nproc > new_nth)
2663 #
if KMP_NESTED_HOT_TEAMS
2664 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2667 kmp_team_t *hot_team = root->r.r_hot_team;
2670 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2673 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2674 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2675 if (__kmp_tasking_mode != tskm_immediate_exec) {
2678 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2680 __kmp_free_thread(hot_team->t.t_threads[f]);
2681 hot_team->t.t_threads[f] = NULL;
2683 hot_team->t.t_nproc = new_nth;
2684 #if KMP_NESTED_HOT_TEAMS 2685 if (thread->th.th_hot_teams) {
2686 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2687 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2691 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2694 for (f = 0; f < new_nth; f++) {
2695 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2696 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2699 hot_team->t.t_size_changed = -1;
2704 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2707 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2709 gtid, max_active_levels));
2710 KMP_DEBUG_ASSERT(__kmp_init_serial);
2713 if (max_active_levels < 0) {
2714 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2719 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2720 "max_active_levels for thread %d = (%d)\n",
2721 gtid, max_active_levels));
2724 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2729 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2730 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2731 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2737 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2738 "max_active_levels for thread %d = (%d)\n",
2739 gtid, max_active_levels));
2741 thread = __kmp_threads[gtid];
2743 __kmp_save_internal_controls(thread);
2745 set__max_active_levels(thread, max_active_levels);
2749 int __kmp_get_max_active_levels(
int gtid) {
2752 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2753 KMP_DEBUG_ASSERT(__kmp_init_serial);
2755 thread = __kmp_threads[gtid];
2756 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2757 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2758 "curtask_maxaclevel=%d\n",
2759 gtid, thread->th.th_current_task,
2760 thread->th.th_current_task->td_icvs.max_active_levels));
2761 return thread->th.th_current_task->td_icvs.max_active_levels;
2765 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2769 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2770 gtid, (
int)kind, chunk));
2771 KMP_DEBUG_ASSERT(__kmp_init_serial);
2777 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2778 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2780 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2781 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2783 kind = kmp_sched_default;
2787 thread = __kmp_threads[gtid];
2789 __kmp_save_internal_controls(thread);
2791 if (kind < kmp_sched_upper_std) {
2792 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2795 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2797 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2798 __kmp_sch_map[kind - kmp_sched_lower - 1];
2803 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2804 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2805 kmp_sched_lower - 2];
2807 if (kind == kmp_sched_auto || chunk < 1) {
2809 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2811 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2816 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2820 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2821 KMP_DEBUG_ASSERT(__kmp_init_serial);
2823 thread = __kmp_threads[gtid];
2825 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2829 case kmp_sch_static_greedy:
2830 case kmp_sch_static_balanced:
2831 *kind = kmp_sched_static;
2834 case kmp_sch_static_chunked:
2835 *kind = kmp_sched_static;
2837 case kmp_sch_dynamic_chunked:
2838 *kind = kmp_sched_dynamic;
2841 case kmp_sch_guided_iterative_chunked:
2842 case kmp_sch_guided_analytical_chunked:
2843 *kind = kmp_sched_guided;
2846 *kind = kmp_sched_auto;
2848 case kmp_sch_trapezoidal:
2849 *kind = kmp_sched_trapezoidal;
2851 #if KMP_STATIC_STEAL_ENABLED 2852 case kmp_sch_static_steal:
2853 *kind = kmp_sched_static_steal;
2857 KMP_FATAL(UnknownSchedulingType, th_type);
2860 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2863 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2869 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2870 KMP_DEBUG_ASSERT(__kmp_init_serial);
2877 thr = __kmp_threads[gtid];
2878 team = thr->th.th_team;
2879 ii = team->t.t_level;
2884 if (thr->th.th_teams_microtask) {
2886 int tlevel = thr->th.th_teams_level;
2889 KMP_DEBUG_ASSERT(ii >= tlevel);
2902 return __kmp_tid_from_gtid(gtid);
2904 dd = team->t.t_serialized;
2906 while (ii > level) {
2907 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2909 if ((team->t.t_serialized) && (!dd)) {
2910 team = team->t.t_parent;
2914 team = team->t.t_parent;
2915 dd = team->t.t_serialized;
2920 return (dd > 1) ? (0) : (team->t.t_master_tid);
2923 int __kmp_get_team_size(
int gtid,
int level) {
2929 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2930 KMP_DEBUG_ASSERT(__kmp_init_serial);
2937 thr = __kmp_threads[gtid];
2938 team = thr->th.th_team;
2939 ii = team->t.t_level;
2944 if (thr->th.th_teams_microtask) {
2946 int tlevel = thr->th.th_teams_level;
2949 KMP_DEBUG_ASSERT(ii >= tlevel);
2961 while (ii > level) {
2962 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2964 if (team->t.t_serialized && (!dd)) {
2965 team = team->t.t_parent;
2969 team = team->t.t_parent;
2974 return team->t.t_nproc;
2977 kmp_r_sched_t __kmp_get_schedule_global() {
2982 kmp_r_sched_t r_sched;
2990 r_sched.r_sched_type = __kmp_static;
2993 r_sched.r_sched_type = __kmp_guided;
2995 r_sched.r_sched_type = __kmp_sched;
2998 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3000 r_sched.chunk = KMP_DEFAULT_CHUNK;
3002 r_sched.chunk = __kmp_chunk;
3010 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3012 KMP_DEBUG_ASSERT(team);
3013 if (!realloc || argc > team->t.t_max_argc) {
3015 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 3016 "current entries=%d\n",
3017 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3019 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3020 __kmp_free((
void *)team->t.t_argv);
3022 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3024 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3025 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3027 team->t.t_id, team->t.t_max_argc));
3028 team->t.t_argv = &team->t.t_inline_argv[0];
3029 if (__kmp_storage_map) {
3030 __kmp_print_storage_map_gtid(
3031 -1, &team->t.t_inline_argv[0],
3032 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3033 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3038 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3039 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3041 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3043 team->t.t_id, team->t.t_max_argc));
3045 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3046 if (__kmp_storage_map) {
3047 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3048 &team->t.t_argv[team->t.t_max_argc],
3049 sizeof(
void *) * team->t.t_max_argc,
3050 "team_%d.t_argv", team->t.t_id);
3056 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3058 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3060 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3061 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3062 sizeof(dispatch_shared_info_t) * num_disp_buff);
3063 team->t.t_dispatch =
3064 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3065 team->t.t_implicit_task_taskdata =
3066 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3067 team->t.t_max_nproc = max_nth;
3070 for (i = 0; i < num_disp_buff; ++i) {
3071 team->t.t_disp_buffer[i].buffer_index = i;
3073 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3078 static void __kmp_free_team_arrays(kmp_team_t *team) {
3081 for (i = 0; i < team->t.t_max_nproc; ++i) {
3082 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3083 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3084 team->t.t_dispatch[i].th_disp_buffer = NULL;
3087 #if KMP_USE_HIER_SCHED 3088 __kmp_dispatch_free_hierarchies(team);
3090 __kmp_free(team->t.t_threads);
3091 __kmp_free(team->t.t_disp_buffer);
3092 __kmp_free(team->t.t_dispatch);
3093 __kmp_free(team->t.t_implicit_task_taskdata);
3094 team->t.t_threads = NULL;
3095 team->t.t_disp_buffer = NULL;
3096 team->t.t_dispatch = NULL;
3097 team->t.t_implicit_task_taskdata = 0;
3100 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3101 kmp_info_t **oldThreads = team->t.t_threads;
3103 __kmp_free(team->t.t_disp_buffer);
3104 __kmp_free(team->t.t_dispatch);
3105 __kmp_free(team->t.t_implicit_task_taskdata);
3106 __kmp_allocate_team_arrays(team, max_nth);
3108 KMP_MEMCPY(team->t.t_threads, oldThreads,
3109 team->t.t_nproc *
sizeof(kmp_info_t *));
3111 __kmp_free(oldThreads);
3114 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3116 kmp_r_sched_t r_sched =
3117 __kmp_get_schedule_global();
3120 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3123 kmp_internal_control_t g_icvs = {
3125 (kmp_int8)__kmp_dflt_nested,
3127 (kmp_int8)__kmp_global.g.g_dynamic,
3129 (kmp_int8)__kmp_env_blocktime,
3131 __kmp_dflt_blocktime,
3136 __kmp_dflt_team_nth,
3139 __kmp_dflt_max_active_levels,
3144 __kmp_nested_proc_bind.bind_types[0],
3145 __kmp_default_device,
3153 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3155 kmp_internal_control_t gx_icvs;
3156 gx_icvs.serial_nesting_level =
3158 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3159 gx_icvs.next = NULL;
3164 static void __kmp_initialize_root(kmp_root_t *root) {
3166 kmp_team_t *root_team;
3167 kmp_team_t *hot_team;
3168 int hot_team_max_nth;
3169 kmp_r_sched_t r_sched =
3170 __kmp_get_schedule_global();
3171 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3172 KMP_DEBUG_ASSERT(root);
3173 KMP_ASSERT(!root->r.r_begin);
3176 __kmp_init_lock(&root->r.r_begin_lock);
3177 root->r.r_begin = FALSE;
3178 root->r.r_active = FALSE;
3179 root->r.r_in_parallel = 0;
3180 root->r.r_blocktime = __kmp_dflt_blocktime;
3181 root->r.r_nested = __kmp_dflt_nested;
3182 root->r.r_cg_nthreads = 1;
3186 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3189 __kmp_allocate_team(root,
3196 __kmp_nested_proc_bind.bind_types[0],
3200 USE_NESTED_HOT_ARG(NULL)
3205 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3208 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3210 root->r.r_root_team = root_team;
3211 root_team->t.t_control_stack_top = NULL;
3214 root_team->t.t_threads[0] = NULL;
3215 root_team->t.t_nproc = 1;
3216 root_team->t.t_serialized = 1;
3218 root_team->t.t_sched.sched = r_sched.sched;
3221 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3222 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3226 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3229 __kmp_allocate_team(root,
3231 __kmp_dflt_team_nth_ub * 2,
3236 __kmp_nested_proc_bind.bind_types[0],
3240 USE_NESTED_HOT_ARG(NULL)
3242 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3244 root->r.r_hot_team = hot_team;
3245 root_team->t.t_control_stack_top = NULL;
3248 hot_team->t.t_parent = root_team;
3251 hot_team_max_nth = hot_team->t.t_max_nproc;
3252 for (f = 0; f < hot_team_max_nth; ++f) {
3253 hot_team->t.t_threads[f] = NULL;
3255 hot_team->t.t_nproc = 1;
3257 hot_team->t.t_sched.sched = r_sched.sched;
3258 hot_team->t.t_size_changed = 0;
3263 typedef struct kmp_team_list_item {
3264 kmp_team_p
const *entry;
3265 struct kmp_team_list_item *next;
3266 } kmp_team_list_item_t;
3267 typedef kmp_team_list_item_t *kmp_team_list_t;
3269 static void __kmp_print_structure_team_accum(
3270 kmp_team_list_t list,
3271 kmp_team_p
const *team
3281 KMP_DEBUG_ASSERT(list != NULL);
3286 __kmp_print_structure_team_accum(list, team->t.t_parent);
3287 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3291 while (l->next != NULL && l->entry != team) {
3294 if (l->next != NULL) {
3300 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3306 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3307 sizeof(kmp_team_list_item_t));
3314 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3317 __kmp_printf(
"%s", title);
3319 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3321 __kmp_printf(
" - (nil)\n");
3325 static void __kmp_print_structure_thread(
char const *title,
3326 kmp_info_p
const *thread) {
3327 __kmp_printf(
"%s", title);
3328 if (thread != NULL) {
3329 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3331 __kmp_printf(
" - (nil)\n");
3335 void __kmp_print_structure(
void) {
3337 kmp_team_list_t list;
3341 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3345 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3346 "Table\n------------------------------\n");
3349 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3350 __kmp_printf(
"%2d", gtid);
3351 if (__kmp_threads != NULL) {
3352 __kmp_printf(
" %p", __kmp_threads[gtid]);
3354 if (__kmp_root != NULL) {
3355 __kmp_printf(
" %p", __kmp_root[gtid]);
3362 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3364 if (__kmp_threads != NULL) {
3366 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3367 kmp_info_t
const *thread = __kmp_threads[gtid];
3368 if (thread != NULL) {
3369 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3370 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3371 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3372 __kmp_print_structure_team(
" Serial Team: ",
3373 thread->th.th_serial_team);
3374 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3375 __kmp_print_structure_thread(
" Master: ",
3376 thread->th.th_team_master);
3377 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3378 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3380 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3382 __kmp_print_structure_thread(
" Next in pool: ",
3383 thread->th.th_next_pool);
3385 __kmp_print_structure_team_accum(list, thread->th.th_team);
3386 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3390 __kmp_printf(
"Threads array is not allocated.\n");
3394 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3396 if (__kmp_root != NULL) {
3398 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3399 kmp_root_t
const *root = __kmp_root[gtid];
3401 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3402 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3403 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3404 __kmp_print_structure_thread(
" Uber Thread: ",
3405 root->r.r_uber_thread);
3406 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3407 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3408 __kmp_printf(
" In Parallel: %2d\n",
3409 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3411 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3412 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3416 __kmp_printf(
"Ubers array is not allocated.\n");
3419 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3421 while (list->next != NULL) {
3422 kmp_team_p
const *team = list->entry;
3424 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3425 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3426 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3427 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3428 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3429 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3430 for (i = 0; i < team->t.t_nproc; ++i) {
3431 __kmp_printf(
" Thread %2d: ", i);
3432 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3434 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3440 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3442 __kmp_print_structure_thread(
"Thread pool: ",
3443 CCAST(kmp_info_t *, __kmp_thread_pool));
3444 __kmp_print_structure_team(
"Team pool: ",
3445 CCAST(kmp_team_t *, __kmp_team_pool));
3449 while (list != NULL) {
3450 kmp_team_list_item_t *item = list;
3452 KMP_INTERNAL_FREE(item);
3461 static const unsigned __kmp_primes[] = {
3462 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3463 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3464 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3465 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3466 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3467 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3468 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3469 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3470 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3471 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3472 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3476 unsigned short __kmp_get_random(kmp_info_t *thread) {
3477 unsigned x = thread->th.th_x;
3478 unsigned short r = x >> 16;
3480 thread->th.th_x = x * thread->th.th_a + 1;
3482 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3483 thread->th.th_info.ds.ds_tid, r));
3489 void __kmp_init_random(kmp_info_t *thread) {
3490 unsigned seed = thread->th.th_info.ds.ds_tid;
3493 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3494 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3496 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3502 static int __kmp_reclaim_dead_roots(
void) {
3505 for (i = 0; i < __kmp_threads_capacity; ++i) {
3506 if (KMP_UBER_GTID(i) &&
3507 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3510 r += __kmp_unregister_root_other_thread(i);
3535 static int __kmp_expand_threads(
int nNeed) {
3537 int minimumRequiredCapacity;
3539 kmp_info_t **newThreads;
3540 kmp_root_t **newRoot;
3546 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB 3549 added = __kmp_reclaim_dead_roots();
3578 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3581 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3585 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3587 newCapacity = __kmp_threads_capacity;
3589 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3590 : __kmp_sys_max_nth;
3591 }
while (newCapacity < minimumRequiredCapacity);
3592 newThreads = (kmp_info_t **)__kmp_allocate(
3593 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3595 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3596 KMP_MEMCPY(newThreads, __kmp_threads,
3597 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3598 KMP_MEMCPY(newRoot, __kmp_root,
3599 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3601 kmp_info_t **temp_threads = __kmp_threads;
3602 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3603 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3604 __kmp_free(temp_threads);
3605 added += newCapacity - __kmp_threads_capacity;
3606 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3608 if (newCapacity > __kmp_tp_capacity) {
3609 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3610 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3611 __kmp_threadprivate_resize_cache(newCapacity);
3613 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3615 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3624 int __kmp_register_root(
int initial_thread) {
3625 kmp_info_t *root_thread;
3629 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3630 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3647 capacity = __kmp_threads_capacity;
3648 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3653 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3654 if (__kmp_tp_cached) {
3655 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3656 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3657 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3659 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3667 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3671 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3672 KMP_ASSERT(gtid < __kmp_threads_capacity);
3676 TCW_4(__kmp_nth, __kmp_nth + 1);
3680 if (__kmp_adjust_gtid_mode) {
3681 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3682 if (TCR_4(__kmp_gtid_mode) != 2) {
3683 TCW_4(__kmp_gtid_mode, 2);
3686 if (TCR_4(__kmp_gtid_mode) != 1) {
3687 TCW_4(__kmp_gtid_mode, 1);
3692 #ifdef KMP_ADJUST_BLOCKTIME 3695 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3696 if (__kmp_nth > __kmp_avail_proc) {
3697 __kmp_zero_bt = TRUE;
3703 if (!(root = __kmp_root[gtid])) {
3704 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3705 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3708 #if KMP_STATS_ENABLED 3710 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3711 __kmp_stats_thread_ptr->startLife();
3712 KMP_SET_THREAD_STATE(SERIAL_REGION);
3715 __kmp_initialize_root(root);
3718 if (root->r.r_uber_thread) {
3719 root_thread = root->r.r_uber_thread;
3721 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3722 if (__kmp_storage_map) {
3723 __kmp_print_thread_storage_map(root_thread, gtid);
3725 root_thread->th.th_info.ds.ds_gtid = gtid;
3727 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3729 root_thread->th.th_root = root;
3730 if (__kmp_env_consistency_check) {
3731 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3734 __kmp_initialize_fast_memory(root_thread);
3738 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3739 __kmp_initialize_bget(root_thread);
3741 __kmp_init_random(root_thread);
3745 if (!root_thread->th.th_serial_team) {
3746 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3747 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3748 root_thread->th.th_serial_team =
3749 __kmp_allocate_team(root, 1, 1,
3756 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3758 KMP_ASSERT(root_thread->th.th_serial_team);
3759 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3760 root_thread->th.th_serial_team));
3763 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3765 root->r.r_root_team->t.t_threads[0] = root_thread;
3766 root->r.r_hot_team->t.t_threads[0] = root_thread;
3767 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3769 root_thread->th.th_serial_team->t.t_serialized = 0;
3770 root->r.r_uber_thread = root_thread;
3773 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3774 TCW_4(__kmp_init_gtid, TRUE);
3777 __kmp_gtid_set_specific(gtid);
3780 __kmp_itt_thread_name(gtid);
3783 #ifdef KMP_TDATA_GTID 3786 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3787 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3789 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3791 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3792 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3793 KMP_INIT_BARRIER_STATE));
3796 for (b = 0; b < bs_last_barrier; ++b) {
3797 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3799 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3803 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3804 KMP_INIT_BARRIER_STATE);
3806 #if KMP_AFFINITY_SUPPORTED 3808 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3809 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3810 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3811 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3813 if (TCR_4(__kmp_init_middle)) {
3814 __kmp_affinity_set_init_mask(gtid, TRUE);
3818 root_thread->th.th_def_allocator = __kmp_def_allocator;
3821 __kmp_root_counter++;
3824 if (!initial_thread && ompt_enabled.enabled) {
3826 kmp_info_t *root_thread = ompt_get_thread();
3828 ompt_set_thread_state(root_thread, omp_state_overhead);
3830 if (ompt_enabled.ompt_callback_thread_begin) {
3831 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3832 ompt_thread_initial, __ompt_get_thread_data_internal());
3834 ompt_data_t *task_data;
3835 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3836 if (ompt_enabled.ompt_callback_task_create) {
3837 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3838 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3842 ompt_set_thread_state(root_thread, omp_state_work_serial);
3847 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3852 #if KMP_NESTED_HOT_TEAMS 3853 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3854 const int max_level) {
3856 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3857 if (!hot_teams || !hot_teams[level].hot_team) {
3860 KMP_DEBUG_ASSERT(level < max_level);
3861 kmp_team_t *team = hot_teams[level].hot_team;
3862 nth = hot_teams[level].hot_team_nth;
3864 if (level < max_level - 1) {
3865 for (i = 0; i < nth; ++i) {
3866 kmp_info_t *th = team->t.t_threads[i];
3867 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3868 if (i > 0 && th->th.th_hot_teams) {
3869 __kmp_free(th->th.th_hot_teams);
3870 th->th.th_hot_teams = NULL;
3874 __kmp_free_team(root, team, NULL);
3881 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3882 kmp_team_t *root_team = root->r.r_root_team;
3883 kmp_team_t *hot_team = root->r.r_hot_team;
3884 int n = hot_team->t.t_nproc;
3887 KMP_DEBUG_ASSERT(!root->r.r_active);
3889 root->r.r_root_team = NULL;
3890 root->r.r_hot_team = NULL;
3893 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3894 #if KMP_NESTED_HOT_TEAMS 3895 if (__kmp_hot_teams_max_level >
3897 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3898 kmp_info_t *th = hot_team->t.t_threads[i];
3899 if (__kmp_hot_teams_max_level > 1) {
3900 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3902 if (th->th.th_hot_teams) {
3903 __kmp_free(th->th.th_hot_teams);
3904 th->th.th_hot_teams = NULL;
3909 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3914 if (__kmp_tasking_mode != tskm_immediate_exec) {
3915 __kmp_wait_to_unref_task_teams();
3921 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3923 (LPVOID) & (root->r.r_uber_thread->th),
3924 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3925 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3929 if (ompt_enabled.ompt_callback_thread_end) {
3930 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3931 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3937 root->r.r_cg_nthreads--;
3939 __kmp_reap_thread(root->r.r_uber_thread, 1);
3943 root->r.r_uber_thread = NULL;
3945 root->r.r_begin = FALSE;
3950 void __kmp_unregister_root_current_thread(
int gtid) {
3951 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3955 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3956 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3957 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3960 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3963 kmp_root_t *root = __kmp_root[gtid];
3965 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3966 KMP_ASSERT(KMP_UBER_GTID(gtid));
3967 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3968 KMP_ASSERT(root->r.r_active == FALSE);
3973 kmp_info_t *thread = __kmp_threads[gtid];
3974 kmp_team_t *team = thread->th.th_team;
3975 kmp_task_team_t *task_team = thread->th.th_task_team;
3978 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3981 thread->th.ompt_thread_info.state = omp_state_undefined;
3983 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3987 __kmp_reset_root(gtid, root);
3990 __kmp_gtid_set_specific(KMP_GTID_DNE);
3991 #ifdef KMP_TDATA_GTID 3992 __kmp_gtid = KMP_GTID_DNE;
3997 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3999 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4006 static int __kmp_unregister_root_other_thread(
int gtid) {
4007 kmp_root_t *root = __kmp_root[gtid];
4010 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4011 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4012 KMP_ASSERT(KMP_UBER_GTID(gtid));
4013 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4014 KMP_ASSERT(root->r.r_active == FALSE);
4016 r = __kmp_reset_root(gtid, root);
4018 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4024 void __kmp_task_info() {
4026 kmp_int32 gtid = __kmp_entry_gtid();
4027 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4028 kmp_info_t *this_thr = __kmp_threads[gtid];
4029 kmp_team_t *steam = this_thr->th.th_serial_team;
4030 kmp_team_t *team = this_thr->th.th_team;
4033 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p " 4035 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4036 team->t.t_implicit_task_taskdata[tid].td_parent);
4043 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4044 int tid,
int gtid) {
4048 kmp_info_t *master = team->t.t_threads[0];
4049 KMP_DEBUG_ASSERT(this_thr != NULL);
4050 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4051 KMP_DEBUG_ASSERT(team);
4052 KMP_DEBUG_ASSERT(team->t.t_threads);
4053 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4054 KMP_DEBUG_ASSERT(master);
4055 KMP_DEBUG_ASSERT(master->th.th_root);
4059 TCW_SYNC_PTR(this_thr->th.th_team, team);
4061 this_thr->th.th_info.ds.ds_tid = tid;
4062 this_thr->th.th_set_nproc = 0;
4063 if (__kmp_tasking_mode != tskm_immediate_exec)
4066 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4068 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4070 this_thr->th.th_set_proc_bind = proc_bind_default;
4071 #if KMP_AFFINITY_SUPPORTED 4072 this_thr->th.th_new_place = this_thr->th.th_current_place;
4075 this_thr->th.th_root = master->th.th_root;
4078 this_thr->th.th_team_nproc = team->t.t_nproc;
4079 this_thr->th.th_team_master = master;
4080 this_thr->th.th_team_serialized = team->t.t_serialized;
4081 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4083 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4085 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4086 tid, gtid, this_thr, this_thr->th.th_current_task));
4088 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4091 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4092 tid, gtid, this_thr, this_thr->th.th_current_task));
4097 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4099 this_thr->th.th_local.this_construct = 0;
4101 if (!this_thr->th.th_pri_common) {
4102 this_thr->th.th_pri_common =
4103 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4104 if (__kmp_storage_map) {
4105 __kmp_print_storage_map_gtid(
4106 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4107 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4109 this_thr->th.th_pri_head = NULL;
4114 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4117 sizeof(dispatch_private_info_t) *
4118 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4119 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4120 team->t.t_max_nproc));
4121 KMP_ASSERT(dispatch);
4122 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4123 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4125 dispatch->th_disp_index = 0;
4127 dispatch->th_doacross_buf_idx = 0;
4129 if (!dispatch->th_disp_buffer) {
4130 dispatch->th_disp_buffer =
4131 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4133 if (__kmp_storage_map) {
4134 __kmp_print_storage_map_gtid(
4135 gtid, &dispatch->th_disp_buffer[0],
4136 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4138 : __kmp_dispatch_num_buffers],
4139 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4140 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4141 gtid, team->t.t_id, gtid);
4144 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4147 dispatch->th_dispatch_pr_current = 0;
4148 dispatch->th_dispatch_sh_current = 0;
4150 dispatch->th_deo_fcn = 0;
4151 dispatch->th_dxo_fcn = 0;
4154 this_thr->th.th_next_pool = NULL;
4156 if (!this_thr->th.th_task_state_memo_stack) {
4158 this_thr->th.th_task_state_memo_stack =
4159 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4160 this_thr->th.th_task_state_top = 0;
4161 this_thr->th.th_task_state_stack_sz = 4;
4162 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4164 this_thr->th.th_task_state_memo_stack[i] = 0;
4167 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4168 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4178 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4180 kmp_team_t *serial_team;
4181 kmp_info_t *new_thr;
4184 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4185 KMP_DEBUG_ASSERT(root && team);
4186 #if !KMP_NESTED_HOT_TEAMS 4187 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4192 if (__kmp_thread_pool) {
4194 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4195 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4196 if (new_thr == __kmp_thread_pool_insert_pt) {
4197 __kmp_thread_pool_insert_pt = NULL;
4199 TCW_4(new_thr->th.th_in_pool, FALSE);
4202 __kmp_thread_pool_nth--;
4204 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4205 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4206 KMP_ASSERT(!new_thr->th.th_team);
4207 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4208 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4211 __kmp_initialize_info(new_thr, team, new_tid,
4212 new_thr->th.th_info.ds.ds_gtid);
4213 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4215 TCW_4(__kmp_nth, __kmp_nth + 1);
4216 root->r.r_cg_nthreads++;
4218 new_thr->th.th_task_state = 0;
4219 new_thr->th.th_task_state_top = 0;
4220 new_thr->th.th_task_state_stack_sz = 4;
4222 #ifdef KMP_ADJUST_BLOCKTIME 4225 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4226 if (__kmp_nth > __kmp_avail_proc) {
4227 __kmp_zero_bt = TRUE;
4236 kmp_balign_t *balign = new_thr->th.th_bar;
4237 for (b = 0; b < bs_last_barrier; ++b)
4238 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4241 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4242 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4249 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4250 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4255 if (!TCR_4(__kmp_init_monitor)) {
4256 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4257 if (!TCR_4(__kmp_init_monitor)) {
4258 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4259 TCW_4(__kmp_init_monitor, 1);
4260 __kmp_create_monitor(&__kmp_monitor);
4261 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4272 while (TCR_4(__kmp_init_monitor) < 2) {
4275 KF_TRACE(10, (
"after monitor thread has started\n"));
4278 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4283 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4284 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4288 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4290 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4292 if (__kmp_storage_map) {
4293 __kmp_print_thread_storage_map(new_thr, new_gtid);
4298 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4299 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4300 new_thr->th.th_serial_team = serial_team =
4301 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4308 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4310 KMP_ASSERT(serial_team);
4311 serial_team->t.t_serialized = 0;
4313 serial_team->t.t_threads[0] = new_thr;
4315 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4319 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4322 __kmp_initialize_fast_memory(new_thr);
4326 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4327 __kmp_initialize_bget(new_thr);
4330 __kmp_init_random(new_thr);
4334 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4335 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4338 kmp_balign_t *balign = new_thr->th.th_bar;
4339 for (b = 0; b < bs_last_barrier; ++b) {
4340 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4341 balign[b].bb.team = NULL;
4342 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4343 balign[b].bb.use_oncore_barrier = 0;
4346 new_thr->th.th_spin_here = FALSE;
4347 new_thr->th.th_next_waiting = 0;
4349 new_thr->th.th_blocking =
false;
4352 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4353 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4354 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4355 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4356 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4359 new_thr->th.th_def_allocator = __kmp_def_allocator;
4362 TCW_4(new_thr->th.th_in_pool, FALSE);
4363 new_thr->th.th_active_in_pool = FALSE;
4364 TCW_4(new_thr->th.th_active, TRUE);
4370 root->r.r_cg_nthreads++;
4374 if (__kmp_adjust_gtid_mode) {
4375 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4376 if (TCR_4(__kmp_gtid_mode) != 2) {
4377 TCW_4(__kmp_gtid_mode, 2);
4380 if (TCR_4(__kmp_gtid_mode) != 1) {
4381 TCW_4(__kmp_gtid_mode, 1);
4386 #ifdef KMP_ADJUST_BLOCKTIME 4389 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4390 if (__kmp_nth > __kmp_avail_proc) {
4391 __kmp_zero_bt = TRUE;
4398 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4399 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4401 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4403 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4414 static void __kmp_reinitialize_team(kmp_team_t *team,
4415 kmp_internal_control_t *new_icvs,
4417 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4418 team->t.t_threads[0], team));
4419 KMP_DEBUG_ASSERT(team && new_icvs);
4420 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4421 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4423 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4425 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4426 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4428 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4429 team->t.t_threads[0], team));
4435 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4436 kmp_internal_control_t *new_icvs,
4438 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4441 KMP_DEBUG_ASSERT(team);
4442 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4443 KMP_DEBUG_ASSERT(team->t.t_threads);
4446 team->t.t_master_tid = 0;
4448 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4449 team->t.t_nproc = new_nproc;
4452 team->t.t_next_pool = NULL;
4456 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4457 team->t.t_invoke = NULL;
4460 team->t.t_sched.sched = new_icvs->sched.sched;
4462 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4463 team->t.t_fp_control_saved = FALSE;
4464 team->t.t_x87_fpu_control_word = 0;
4465 team->t.t_mxcsr = 0;
4468 team->t.t_construct = 0;
4470 team->t.t_ordered.dt.t_value = 0;
4471 team->t.t_master_active = FALSE;
4473 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4476 team->t.t_copypriv_data = NULL;
4479 team->t.t_copyin_counter = 0;
4482 team->t.t_control_stack_top = NULL;
4484 __kmp_reinitialize_team(team, new_icvs, loc);
4487 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4490 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4493 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4494 if (KMP_AFFINITY_CAPABLE()) {
4496 if (old_mask != NULL) {
4497 status = __kmp_get_system_affinity(old_mask, TRUE);
4500 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4504 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4509 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4515 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4517 kmp_info_t *master_th = team->t.t_threads[0];
4518 KMP_DEBUG_ASSERT(master_th != NULL);
4519 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4520 int first_place = master_th->th.th_first_place;
4521 int last_place = master_th->th.th_last_place;
4522 int masters_place = master_th->th.th_current_place;
4523 team->t.t_first_place = first_place;
4524 team->t.t_last_place = last_place;
4526 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4527 "bound to place %d partition = [%d,%d]\n",
4528 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4529 team->t.t_id, masters_place, first_place, last_place));
4531 switch (proc_bind) {
4533 case proc_bind_default:
4536 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4539 case proc_bind_master: {
4541 int n_th = team->t.t_nproc;
4542 for (f = 1; f < n_th; f++) {
4543 kmp_info_t *th = team->t.t_threads[f];
4544 KMP_DEBUG_ASSERT(th != NULL);
4545 th->th.th_first_place = first_place;
4546 th->th.th_last_place = last_place;
4547 th->th.th_new_place = masters_place;
4549 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4550 "partition = [%d,%d]\n",
4551 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4552 f, masters_place, first_place, last_place));
4556 case proc_bind_close: {
4558 int n_th = team->t.t_nproc;
4560 if (first_place <= last_place) {
4561 n_places = last_place - first_place + 1;
4563 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4565 if (n_th <= n_places) {
4566 int place = masters_place;
4567 for (f = 1; f < n_th; f++) {
4568 kmp_info_t *th = team->t.t_threads[f];
4569 KMP_DEBUG_ASSERT(th != NULL);
4571 if (place == last_place) {
4572 place = first_place;
4573 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4578 th->th.th_first_place = first_place;
4579 th->th.th_last_place = last_place;
4580 th->th.th_new_place = place;
4582 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4583 "partition = [%d,%d]\n",
4584 __kmp_gtid_from_thread(team->t.t_threads[f]),
4585 team->t.t_id, f, place, first_place, last_place));
4588 int S, rem, gap, s_count;
4589 S = n_th / n_places;
4591 rem = n_th - (S * n_places);
4592 gap = rem > 0 ? n_places / rem : n_places;
4593 int place = masters_place;
4595 for (f = 0; f < n_th; f++) {
4596 kmp_info_t *th = team->t.t_threads[f];
4597 KMP_DEBUG_ASSERT(th != NULL);
4599 th->th.th_first_place = first_place;
4600 th->th.th_last_place = last_place;
4601 th->th.th_new_place = place;
4604 if ((s_count == S) && rem && (gap_ct == gap)) {
4606 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4608 if (place == last_place) {
4609 place = first_place;
4610 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4618 }
else if (s_count == S) {
4619 if (place == last_place) {
4620 place = first_place;
4621 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4631 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4632 "partition = [%d,%d]\n",
4633 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4634 th->th.th_new_place, first_place, last_place));
4636 KMP_DEBUG_ASSERT(place == masters_place);
4640 case proc_bind_spread: {
4642 int n_th = team->t.t_nproc;
4645 if (first_place <= last_place) {
4646 n_places = last_place - first_place + 1;
4648 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4650 if (n_th <= n_places) {
4653 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4654 int S = n_places / n_th;
4655 int s_count, rem, gap, gap_ct;
4657 place = masters_place;
4658 rem = n_places - n_th * S;
4659 gap = rem ? n_th / rem : 1;
4662 if (update_master_only == 1)
4664 for (f = 0; f < thidx; f++) {
4665 kmp_info_t *th = team->t.t_threads[f];
4666 KMP_DEBUG_ASSERT(th != NULL);
4668 th->th.th_first_place = place;
4669 th->th.th_new_place = place;
4671 while (s_count < S) {
4672 if (place == last_place) {
4673 place = first_place;
4674 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4681 if (rem && (gap_ct == gap)) {
4682 if (place == last_place) {
4683 place = first_place;
4684 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4692 th->th.th_last_place = place;
4695 if (place == last_place) {
4696 place = first_place;
4697 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4704 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4705 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4706 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4707 f, th->th.th_new_place, th->th.th_first_place,
4708 th->th.th_last_place, __kmp_affinity_num_masks));
4714 double current =
static_cast<double>(masters_place);
4716 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4721 if (update_master_only == 1)
4723 for (f = 0; f < thidx; f++) {
4724 first =
static_cast<int>(current);
4725 last =
static_cast<int>(current + spacing) - 1;
4726 KMP_DEBUG_ASSERT(last >= first);
4727 if (first >= n_places) {
4728 if (masters_place) {
4731 if (first == (masters_place + 1)) {
4732 KMP_DEBUG_ASSERT(f == n_th);
4735 if (last == masters_place) {
4736 KMP_DEBUG_ASSERT(f == (n_th - 1));
4740 KMP_DEBUG_ASSERT(f == n_th);
4745 if (last >= n_places) {
4746 last = (n_places - 1);
4751 KMP_DEBUG_ASSERT(0 <= first);
4752 KMP_DEBUG_ASSERT(n_places > first);
4753 KMP_DEBUG_ASSERT(0 <= last);
4754 KMP_DEBUG_ASSERT(n_places > last);
4755 KMP_DEBUG_ASSERT(last_place >= first_place);
4756 th = team->t.t_threads[f];
4757 KMP_DEBUG_ASSERT(th);
4758 th->th.th_first_place = first;
4759 th->th.th_new_place = place;
4760 th->th.th_last_place = last;
4763 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4764 "partition = [%d,%d], spacing = %.4f\n",
4765 __kmp_gtid_from_thread(team->t.t_threads[f]),
4766 team->t.t_id, f, th->th.th_new_place,
4767 th->th.th_first_place, th->th.th_last_place, spacing));
4771 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4773 int S, rem, gap, s_count;
4774 S = n_th / n_places;
4776 rem = n_th - (S * n_places);
4777 gap = rem > 0 ? n_places / rem : n_places;
4778 int place = masters_place;
4781 if (update_master_only == 1)
4783 for (f = 0; f < thidx; f++) {
4784 kmp_info_t *th = team->t.t_threads[f];
4785 KMP_DEBUG_ASSERT(th != NULL);
4787 th->th.th_first_place = place;
4788 th->th.th_last_place = place;
4789 th->th.th_new_place = place;
4792 if ((s_count == S) && rem && (gap_ct == gap)) {
4794 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4796 if (place == last_place) {
4797 place = first_place;
4798 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4806 }
else if (s_count == S) {
4807 if (place == last_place) {
4808 place = first_place;
4809 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4818 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4819 "partition = [%d,%d]\n",
4820 __kmp_gtid_from_thread(team->t.t_threads[f]),
4821 team->t.t_id, f, th->th.th_new_place,
4822 th->th.th_first_place, th->th.th_last_place));
4824 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4832 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4840 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4842 ompt_data_t ompt_parallel_data,
4845 kmp_proc_bind_t new_proc_bind,
4847 kmp_internal_control_t *new_icvs,
4848 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4849 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4852 int use_hot_team = !root->r.r_active;
4855 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4856 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4857 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4860 #if KMP_NESTED_HOT_TEAMS 4861 kmp_hot_team_ptr_t *hot_teams;
4863 team = master->th.th_team;
4864 level = team->t.t_active_level;
4865 if (master->th.th_teams_microtask) {
4866 if (master->th.th_teams_size.nteams > 1 &&
4869 (microtask_t)__kmp_teams_master ||
4870 master->th.th_teams_level <
4876 hot_teams = master->th.th_hot_teams;
4877 if (level < __kmp_hot_teams_max_level && hot_teams &&
4887 if (use_hot_team && new_nproc > 1) {
4888 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4889 #if KMP_NESTED_HOT_TEAMS 4890 team = hot_teams[level].hot_team;
4892 team = root->r.r_hot_team;
4895 if (__kmp_tasking_mode != tskm_immediate_exec) {
4896 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4897 "task_team[1] = %p before reinit\n",
4898 team->t.t_task_team[0], team->t.t_task_team[1]));
4905 if (team->t.t_nproc == new_nproc) {
4906 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4909 if (team->t.t_size_changed == -1) {
4910 team->t.t_size_changed = 1;
4912 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4916 kmp_r_sched_t new_sched = new_icvs->sched;
4918 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4920 __kmp_reinitialize_team(team, new_icvs,
4921 root->r.r_uber_thread->th.th_ident);
4923 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4924 team->t.t_threads[0], team));
4925 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4928 #if KMP_AFFINITY_SUPPORTED 4929 if ((team->t.t_size_changed == 0) &&
4930 (team->t.t_proc_bind == new_proc_bind)) {
4931 if (new_proc_bind == proc_bind_spread) {
4932 __kmp_partition_places(
4935 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4936 "proc_bind = %d, partition = [%d,%d]\n",
4937 team->t.t_id, new_proc_bind, team->t.t_first_place,
4938 team->t.t_last_place));
4940 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4941 __kmp_partition_places(team);
4944 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4947 }
else if (team->t.t_nproc > new_nproc) {
4949 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4952 team->t.t_size_changed = 1;
4953 #if KMP_NESTED_HOT_TEAMS 4954 if (__kmp_hot_teams_mode == 0) {
4957 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4958 hot_teams[level].hot_team_nth = new_nproc;
4959 #endif // KMP_NESTED_HOT_TEAMS 4961 for (f = new_nproc; f < team->t.t_nproc; f++) {
4962 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4963 if (__kmp_tasking_mode != tskm_immediate_exec) {
4966 team->t.t_threads[f]->th.th_task_team = NULL;
4968 __kmp_free_thread(team->t.t_threads[f]);
4969 team->t.t_threads[f] = NULL;
4971 #if KMP_NESTED_HOT_TEAMS 4976 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4977 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4978 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4979 for (
int b = 0; b < bs_last_barrier; ++b) {
4980 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4981 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4983 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4987 #endif // KMP_NESTED_HOT_TEAMS 4988 team->t.t_nproc = new_nproc;
4990 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
4991 __kmp_reinitialize_team(team, new_icvs,
4992 root->r.r_uber_thread->th.th_ident);
4995 for (f = 0; f < new_nproc; ++f) {
4996 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5000 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5001 team->t.t_threads[0], team));
5003 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5006 for (f = 0; f < team->t.t_nproc; f++) {
5007 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5008 team->t.t_threads[f]->th.th_team_nproc ==
5014 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5015 #if KMP_AFFINITY_SUPPORTED 5016 __kmp_partition_places(team);
5020 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5021 kmp_affin_mask_t *old_mask;
5022 if (KMP_AFFINITY_CAPABLE()) {
5023 KMP_CPU_ALLOC(old_mask);
5028 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5031 team->t.t_size_changed = 1;
5033 #if KMP_NESTED_HOT_TEAMS 5034 int avail_threads = hot_teams[level].hot_team_nth;
5035 if (new_nproc < avail_threads)
5036 avail_threads = new_nproc;
5037 kmp_info_t **other_threads = team->t.t_threads;
5038 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5042 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5043 for (b = 0; b < bs_last_barrier; ++b) {
5044 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5045 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5047 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5051 if (hot_teams[level].hot_team_nth >= new_nproc) {
5054 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5055 team->t.t_nproc = new_nproc;
5061 hot_teams[level].hot_team_nth = new_nproc;
5062 #endif // KMP_NESTED_HOT_TEAMS 5063 if (team->t.t_max_nproc < new_nproc) {
5065 __kmp_reallocate_team_arrays(team, new_nproc);
5066 __kmp_reinitialize_team(team, new_icvs, NULL);
5069 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5074 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5078 for (f = team->t.t_nproc; f < new_nproc; f++) {
5079 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5080 KMP_DEBUG_ASSERT(new_worker);
5081 team->t.t_threads[f] = new_worker;
5084 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5085 "join=%llu, plain=%llu\n",
5086 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5087 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5088 team->t.t_bar[bs_plain_barrier].b_arrived));
5092 kmp_balign_t *balign = new_worker->th.th_bar;
5093 for (b = 0; b < bs_last_barrier; ++b) {
5094 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5095 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5096 KMP_BARRIER_PARENT_FLAG);
5098 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5104 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5105 if (KMP_AFFINITY_CAPABLE()) {
5107 __kmp_set_system_affinity(old_mask, TRUE);
5108 KMP_CPU_FREE(old_mask);
5111 #if KMP_NESTED_HOT_TEAMS 5113 #endif // KMP_NESTED_HOT_TEAMS 5115 int old_nproc = team->t.t_nproc;
5117 __kmp_initialize_team(team, new_nproc, new_icvs,
5118 root->r.r_uber_thread->th.th_ident);
5121 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5122 for (f = 0; f < team->t.t_nproc; ++f)
5123 __kmp_initialize_info(team->t.t_threads[f], team, f,
5124 __kmp_gtid_from_tid(f, team));
5131 for (f = old_nproc; f < team->t.t_nproc; ++f)
5132 team->t.t_threads[f]->th.th_task_state =
5133 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5136 team->t.t_threads[0]->th.th_task_state;
5137 for (f = old_nproc; f < team->t.t_nproc; ++f)
5138 team->t.t_threads[f]->th.th_task_state = old_state;
5142 for (f = 0; f < team->t.t_nproc; ++f) {
5143 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5144 team->t.t_threads[f]->th.th_team_nproc ==
5150 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5151 #if KMP_AFFINITY_SUPPORTED 5152 __kmp_partition_places(team);
5158 kmp_info_t *master = team->t.t_threads[0];
5159 if (master->th.th_teams_microtask) {
5160 for (f = 1; f < new_nproc; ++f) {
5162 kmp_info_t *thr = team->t.t_threads[f];
5163 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5164 thr->th.th_teams_level = master->th.th_teams_level;
5165 thr->th.th_teams_size = master->th.th_teams_size;
5169 #if KMP_NESTED_HOT_TEAMS 5173 for (f = 1; f < new_nproc; ++f) {
5174 kmp_info_t *thr = team->t.t_threads[f];
5176 kmp_balign_t *balign = thr->th.th_bar;
5177 for (b = 0; b < bs_last_barrier; ++b) {
5178 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5179 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5181 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5186 #endif // KMP_NESTED_HOT_TEAMS 5189 __kmp_alloc_argv_entries(argc, team, TRUE);
5190 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5194 KF_TRACE(10, (
" hot_team = %p\n", team));
5197 if (__kmp_tasking_mode != tskm_immediate_exec) {
5198 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5199 "task_team[1] = %p after reinit\n",
5200 team->t.t_task_team[0], team->t.t_task_team[1]));
5205 __ompt_team_assign_id(team, ompt_parallel_data);
5215 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5218 if (team->t.t_max_nproc >= max_nproc) {
5220 __kmp_team_pool = team->t.t_next_pool;
5223 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5225 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5226 "task_team[1] %p to NULL\n",
5227 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5228 team->t.t_task_team[0] = NULL;
5229 team->t.t_task_team[1] = NULL;
5232 __kmp_alloc_argv_entries(argc, team, TRUE);
5233 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5236 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5237 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5240 for (b = 0; b < bs_last_barrier; ++b) {
5241 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5243 team->t.t_bar[b].b_master_arrived = 0;
5244 team->t.t_bar[b].b_team_arrived = 0;
5250 team->t.t_proc_bind = new_proc_bind;
5253 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5257 __ompt_team_assign_id(team, ompt_parallel_data);
5269 team = __kmp_reap_team(team);
5270 __kmp_team_pool = team;
5275 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5278 team->t.t_max_nproc = max_nproc;
5281 __kmp_allocate_team_arrays(team, max_nproc);
5283 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5284 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5286 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5288 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5289 team->t.t_task_team[0] = NULL;
5291 team->t.t_task_team[1] = NULL;
5294 if (__kmp_storage_map) {
5295 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5299 __kmp_alloc_argv_entries(argc, team, FALSE);
5300 team->t.t_argc = argc;
5303 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5304 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5307 for (b = 0; b < bs_last_barrier; ++b) {
5308 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5310 team->t.t_bar[b].b_master_arrived = 0;
5311 team->t.t_bar[b].b_team_arrived = 0;
5317 team->t.t_proc_bind = new_proc_bind;
5321 __ompt_team_assign_id(team, ompt_parallel_data);
5322 team->t.ompt_serialized_team_info = NULL;
5327 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5338 void __kmp_free_team(kmp_root_t *root,
5339 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5341 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5345 KMP_DEBUG_ASSERT(root);
5346 KMP_DEBUG_ASSERT(team);
5347 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5348 KMP_DEBUG_ASSERT(team->t.t_threads);
5350 int use_hot_team = team == root->r.r_hot_team;
5351 #if KMP_NESTED_HOT_TEAMS 5353 kmp_hot_team_ptr_t *hot_teams;
5355 level = team->t.t_active_level - 1;
5356 if (master->th.th_teams_microtask) {
5357 if (master->th.th_teams_size.nteams > 1) {
5361 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5362 master->th.th_teams_level == team->t.t_level) {
5367 hot_teams = master->th.th_hot_teams;
5368 if (level < __kmp_hot_teams_max_level) {
5369 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5373 #endif // KMP_NESTED_HOT_TEAMS 5376 TCW_SYNC_PTR(team->t.t_pkfn,
5379 team->t.t_copyin_counter = 0;
5384 if (!use_hot_team) {
5385 if (__kmp_tasking_mode != tskm_immediate_exec) {
5387 for (f = 1; f < team->t.t_nproc; ++f) {
5388 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5389 kmp_info_t *th = team->t.t_threads[f];
5390 volatile kmp_uint32 *state = &th->th.th_reap_state;
5391 while (*state != KMP_SAFE_TO_REAP) {
5395 if (!__kmp_is_thread_alive(th, &ecode)) {
5396 *state = KMP_SAFE_TO_REAP;
5401 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5402 if (fl.is_sleeping())
5403 fl.resume(__kmp_gtid_from_thread(th));
5410 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5411 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5412 if (task_team != NULL) {
5413 for (f = 0; f < team->t.t_nproc;
5415 team->t.t_threads[f]->th.th_task_team = NULL;
5419 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5420 __kmp_get_gtid(), task_team, team->t.t_id));
5421 #if KMP_NESTED_HOT_TEAMS 5422 __kmp_free_task_team(master, task_team);
5424 team->t.t_task_team[tt_idx] = NULL;
5430 team->t.t_parent = NULL;
5431 team->t.t_level = 0;
5432 team->t.t_active_level = 0;
5435 for (f = 1; f < team->t.t_nproc; ++f) {
5436 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5437 __kmp_free_thread(team->t.t_threads[f]);
5438 team->t.t_threads[f] = NULL;
5443 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5444 __kmp_team_pool = (
volatile kmp_team_t *)team;
5451 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5452 kmp_team_t *next_pool = team->t.t_next_pool;
5454 KMP_DEBUG_ASSERT(team);
5455 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5456 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5457 KMP_DEBUG_ASSERT(team->t.t_threads);
5458 KMP_DEBUG_ASSERT(team->t.t_argv);
5463 __kmp_free_team_arrays(team);
5464 if (team->t.t_argv != &team->t.t_inline_argv[0])
5465 __kmp_free((
void *)team->t.t_argv);
5497 void __kmp_free_thread(kmp_info_t *this_th) {
5500 kmp_root_t *root = this_th->th.th_root;
5502 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5503 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5505 KMP_DEBUG_ASSERT(this_th);
5510 kmp_balign_t *balign = this_th->th.th_bar;
5511 for (b = 0; b < bs_last_barrier; ++b) {
5512 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5513 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5514 balign[b].bb.team = NULL;
5515 balign[b].bb.leaf_kids = 0;
5517 this_th->th.th_task_state = 0;
5518 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5521 TCW_PTR(this_th->th.th_team, NULL);
5522 TCW_PTR(this_th->th.th_root, NULL);
5523 TCW_PTR(this_th->th.th_dispatch, NULL);
5530 __kmp_free_implicit_task(this_th);
5531 this_th->th.th_current_task = NULL;
5535 gtid = this_th->th.th_info.ds.ds_gtid;
5536 if (__kmp_thread_pool_insert_pt != NULL) {
5537 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5538 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5539 __kmp_thread_pool_insert_pt = NULL;
5548 if (__kmp_thread_pool_insert_pt != NULL) {
5549 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5551 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5553 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5554 scan = &((*scan)->th.th_next_pool))
5559 TCW_PTR(this_th->th.th_next_pool, *scan);
5560 __kmp_thread_pool_insert_pt = *scan = this_th;
5561 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5562 (this_th->th.th_info.ds.ds_gtid <
5563 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5564 TCW_4(this_th->th.th_in_pool, TRUE);
5565 __kmp_thread_pool_nth++;
5567 TCW_4(__kmp_nth, __kmp_nth - 1);
5568 root->r.r_cg_nthreads--;
5570 #ifdef KMP_ADJUST_BLOCKTIME 5573 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5574 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5575 if (__kmp_nth <= __kmp_avail_proc) {
5576 __kmp_zero_bt = FALSE;
5586 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5587 int gtid = this_thr->th.th_info.ds.ds_gtid;
5589 kmp_team_t *(*
volatile pteam);
5592 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5594 if (__kmp_env_consistency_check) {
5595 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5599 ompt_data_t *thread_data;
5600 if (ompt_enabled.enabled) {
5601 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5602 *thread_data = ompt_data_none;
5604 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5605 this_thr->th.ompt_thread_info.wait_id = 0;
5606 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5607 if (ompt_enabled.ompt_callback_thread_begin) {
5608 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5609 ompt_thread_worker, thread_data);
5615 if (ompt_enabled.enabled) {
5616 this_thr->th.ompt_thread_info.state = omp_state_idle;
5620 while (!TCR_4(__kmp_global.g.g_done)) {
5621 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5625 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5628 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5631 if (ompt_enabled.enabled) {
5632 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5636 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5639 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5641 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5644 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5645 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5646 (*pteam)->t.t_pkfn));
5648 updateHWFPControl(*pteam);
5651 if (ompt_enabled.enabled) {
5652 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
5656 rc = (*pteam)->t.t_invoke(gtid);
5660 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5661 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5662 (*pteam)->t.t_pkfn));
5665 if (ompt_enabled.enabled) {
5667 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
5669 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5673 __kmp_join_barrier(gtid);
5676 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5679 if (ompt_enabled.ompt_callback_thread_end) {
5680 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5684 this_thr->th.th_task_team = NULL;
5686 __kmp_common_destroy_gtid(gtid);
5688 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5695 void __kmp_internal_end_dest(
void *specific_gtid) {
5696 #if KMP_COMPILER_ICC 5697 #pragma warning(push) 5698 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5702 int gtid = (kmp_intptr_t)specific_gtid - 1;
5703 #if KMP_COMPILER_ICC 5704 #pragma warning(pop) 5707 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5720 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5721 __kmp_gtid_set_specific(gtid);
5722 #ifdef KMP_TDATA_GTID 5725 __kmp_internal_end_thread(gtid);
5728 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5734 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5735 __kmp_internal_end_atexit();
5738 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5744 void __kmp_internal_end_atexit(
void) {
5745 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5769 __kmp_internal_end_library(-1);
5771 __kmp_close_console();
5775 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5780 KMP_DEBUG_ASSERT(thread != NULL);
5782 gtid = thread->th.th_info.ds.ds_gtid;
5786 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5789 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5793 ANNOTATE_HAPPENS_BEFORE(thread);
5794 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5795 __kmp_release_64(&flag);
5799 __kmp_reap_worker(thread);
5811 if (thread->th.th_active_in_pool) {
5812 thread->th.th_active_in_pool = FALSE;
5813 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5814 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5818 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5819 --__kmp_thread_pool_nth;
5822 __kmp_free_implicit_task(thread);
5826 __kmp_free_fast_memory(thread);
5829 __kmp_suspend_uninitialize_thread(thread);
5831 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5832 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5837 #ifdef KMP_ADJUST_BLOCKTIME 5840 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5841 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5842 if (__kmp_nth <= __kmp_avail_proc) {
5843 __kmp_zero_bt = FALSE;
5849 if (__kmp_env_consistency_check) {
5850 if (thread->th.th_cons) {
5851 __kmp_free_cons_stack(thread->th.th_cons);
5852 thread->th.th_cons = NULL;
5856 if (thread->th.th_pri_common != NULL) {
5857 __kmp_free(thread->th.th_pri_common);
5858 thread->th.th_pri_common = NULL;
5861 if (thread->th.th_task_state_memo_stack != NULL) {
5862 __kmp_free(thread->th.th_task_state_memo_stack);
5863 thread->th.th_task_state_memo_stack = NULL;
5867 if (thread->th.th_local.bget_data != NULL) {
5868 __kmp_finalize_bget(thread);
5872 #if KMP_AFFINITY_SUPPORTED 5873 if (thread->th.th_affin_mask != NULL) {
5874 KMP_CPU_FREE(thread->th.th_affin_mask);
5875 thread->th.th_affin_mask = NULL;
5879 #if KMP_USE_HIER_SCHED 5880 if (thread->th.th_hier_bar_data != NULL) {
5881 __kmp_free(thread->th.th_hier_bar_data);
5882 thread->th.th_hier_bar_data = NULL;
5886 __kmp_reap_team(thread->th.th_serial_team);
5887 thread->th.th_serial_team = NULL;
5894 static void __kmp_internal_end(
void) {
5898 __kmp_unregister_library();
5905 __kmp_reclaim_dead_roots();
5909 for (i = 0; i < __kmp_threads_capacity; i++)
5911 if (__kmp_root[i]->r.r_active)
5914 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5916 if (i < __kmp_threads_capacity) {
5928 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5929 if (TCR_4(__kmp_init_monitor)) {
5930 __kmp_reap_monitor(&__kmp_monitor);
5931 TCW_4(__kmp_init_monitor, 0);
5933 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5934 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5935 #endif // KMP_USE_MONITOR 5940 for (i = 0; i < __kmp_threads_capacity; i++) {
5941 if (__kmp_root[i]) {
5944 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5953 while (__kmp_thread_pool != NULL) {
5955 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5956 __kmp_thread_pool = thread->th.th_next_pool;
5958 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5959 thread->th.th_next_pool = NULL;
5960 thread->th.th_in_pool = FALSE;
5961 __kmp_reap_thread(thread, 0);
5963 __kmp_thread_pool_insert_pt = NULL;
5966 while (__kmp_team_pool != NULL) {
5968 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5969 __kmp_team_pool = team->t.t_next_pool;
5971 team->t.t_next_pool = NULL;
5972 __kmp_reap_team(team);
5975 __kmp_reap_task_teams();
5982 for (i = 0; i < __kmp_threads_capacity; i++) {
5983 kmp_info_t *thr = __kmp_threads[i];
5984 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
5989 for (i = 0; i < __kmp_threads_capacity; ++i) {
5996 TCW_SYNC_4(__kmp_init_common, FALSE);
5998 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6006 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6007 if (TCR_4(__kmp_init_monitor)) {
6008 __kmp_reap_monitor(&__kmp_monitor);
6009 TCW_4(__kmp_init_monitor, 0);
6011 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6012 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6015 TCW_4(__kmp_init_gtid, FALSE);
6024 void __kmp_internal_end_library(
int gtid_req) {
6031 if (__kmp_global.g.g_abort) {
6032 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6036 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6037 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6045 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6047 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6048 if (gtid == KMP_GTID_SHUTDOWN) {
6049 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6050 "already shutdown\n"));
6052 }
else if (gtid == KMP_GTID_MONITOR) {
6053 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6054 "registered, or system shutdown\n"));
6056 }
else if (gtid == KMP_GTID_DNE) {
6057 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6060 }
else if (KMP_UBER_GTID(gtid)) {
6062 if (__kmp_root[gtid]->r.r_active) {
6063 __kmp_global.g.g_abort = -1;
6064 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6066 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6072 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6073 __kmp_unregister_root_current_thread(gtid);
6080 #ifdef DUMP_DEBUG_ON_EXIT 6081 if (__kmp_debug_buf)
6082 __kmp_dump_debug_buffer();
6088 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6091 if (__kmp_global.g.g_abort) {
6092 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6094 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6097 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6098 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6107 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6110 __kmp_internal_end();
6112 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6113 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6115 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6117 #ifdef DUMP_DEBUG_ON_EXIT 6118 if (__kmp_debug_buf)
6119 __kmp_dump_debug_buffer();
6123 __kmp_close_console();
6126 __kmp_fini_allocator();
6130 void __kmp_internal_end_thread(
int gtid_req) {
6139 if (__kmp_global.g.g_abort) {
6140 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6144 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6145 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6153 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6155 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6156 if (gtid == KMP_GTID_SHUTDOWN) {
6157 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6158 "already shutdown\n"));
6160 }
else if (gtid == KMP_GTID_MONITOR) {
6161 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6162 "registered, or system shutdown\n"));
6164 }
else if (gtid == KMP_GTID_DNE) {
6165 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6169 }
else if (KMP_UBER_GTID(gtid)) {
6171 if (__kmp_root[gtid]->r.r_active) {
6172 __kmp_global.g.g_abort = -1;
6173 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6175 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6179 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6181 __kmp_unregister_root_current_thread(gtid);
6185 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6188 __kmp_threads[gtid]->th.th_task_team = NULL;
6192 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6206 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6210 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6213 if (__kmp_global.g.g_abort) {
6214 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6216 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6219 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6220 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6231 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6233 for (i = 0; i < __kmp_threads_capacity; ++i) {
6234 if (KMP_UBER_GTID(i)) {
6237 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6238 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6239 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6246 __kmp_internal_end();
6248 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6249 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6251 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6253 #ifdef DUMP_DEBUG_ON_EXIT 6254 if (__kmp_debug_buf)
6255 __kmp_dump_debug_buffer();
6262 static long __kmp_registration_flag = 0;
6264 static char *__kmp_registration_str = NULL;
6267 static inline char *__kmp_reg_status_name() {
6272 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6275 void __kmp_register_library_startup(
void) {
6277 char *name = __kmp_reg_status_name();
6283 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6284 __kmp_initialize_system_tick();
6286 __kmp_read_system_time(&time.dtime);
6287 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6288 __kmp_registration_str =
6289 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6290 __kmp_registration_flag, KMP_LIBRARY_FILE);
6292 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6293 __kmp_registration_str));
6300 __kmp_env_set(name, __kmp_registration_str, 0);
6302 value = __kmp_env_get(name);
6303 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6313 char *flag_addr_str = NULL;
6314 char *flag_val_str = NULL;
6315 char const *file_name = NULL;
6316 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6317 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6320 long *flag_addr = 0;
6322 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6323 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6324 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6328 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6342 file_name =
"unknown library";
6346 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6347 if (!__kmp_str_match_true(duplicate_ok)) {
6349 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6350 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6352 KMP_INTERNAL_FREE(duplicate_ok);
6353 __kmp_duplicate_library_ok = 1;
6358 __kmp_env_unset(name);
6360 default: { KMP_DEBUG_ASSERT(0); }
break;
6363 KMP_INTERNAL_FREE((
void *)value);
6365 KMP_INTERNAL_FREE((
void *)name);
6369 void __kmp_unregister_library(
void) {
6371 char *name = __kmp_reg_status_name();
6372 char *value = __kmp_env_get(name);
6374 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6375 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6376 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6378 __kmp_env_unset(name);
6381 KMP_INTERNAL_FREE(__kmp_registration_str);
6382 KMP_INTERNAL_FREE(value);
6383 KMP_INTERNAL_FREE(name);
6385 __kmp_registration_flag = 0;
6386 __kmp_registration_str = NULL;
6393 #if KMP_MIC_SUPPORTED 6395 static void __kmp_check_mic_type() {
6396 kmp_cpuid_t cpuid_state = {0};
6397 kmp_cpuid_t *cs_p = &cpuid_state;
6398 __kmp_x86_cpuid(1, 0, cs_p);
6400 if ((cs_p->eax & 0xff0) == 0xB10) {
6401 __kmp_mic_type = mic2;
6402 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6403 __kmp_mic_type = mic3;
6405 __kmp_mic_type = non_mic;
6411 static void __kmp_do_serial_initialize(
void) {
6415 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6417 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6418 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6419 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6420 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6421 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6427 __kmp_validate_locks();
6430 __kmp_init_allocator();
6435 __kmp_register_library_startup();
6438 if (TCR_4(__kmp_global.g.g_done)) {
6439 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6442 __kmp_global.g.g_abort = 0;
6443 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6446 #if KMP_USE_ADAPTIVE_LOCKS 6447 #if KMP_DEBUG_ADAPTIVE_LOCKS 6448 __kmp_init_speculative_stats();
6451 #if KMP_STATS_ENABLED 6454 __kmp_init_lock(&__kmp_global_lock);
6455 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6456 __kmp_init_lock(&__kmp_debug_lock);
6457 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6458 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6459 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6460 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6461 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6462 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6463 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6464 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6465 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6466 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6467 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6468 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6469 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6470 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6471 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6473 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6475 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6479 __kmp_runtime_initialize();
6481 #if KMP_MIC_SUPPORTED 6482 __kmp_check_mic_type();
6489 __kmp_abort_delay = 0;
6493 __kmp_dflt_team_nth_ub = __kmp_xproc;
6494 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6495 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6497 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6498 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6500 __kmp_max_nth = __kmp_sys_max_nth;
6501 __kmp_cg_max_nth = __kmp_sys_max_nth;
6502 __kmp_teams_max_nth = __kmp_xproc;
6503 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6504 __kmp_teams_max_nth = __kmp_sys_max_nth;
6509 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6511 __kmp_monitor_wakeups =
6512 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6513 __kmp_bt_intervals =
6514 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6517 __kmp_library = library_throughput;
6519 __kmp_static = kmp_sch_static_balanced;
6526 #if KMP_FAST_REDUCTION_BARRIER 6527 #define kmp_reduction_barrier_gather_bb ((int)1) 6528 #define kmp_reduction_barrier_release_bb ((int)1) 6529 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6530 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6531 #endif // KMP_FAST_REDUCTION_BARRIER 6532 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6533 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6534 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6535 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6536 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6537 #if KMP_FAST_REDUCTION_BARRIER 6538 if (i == bs_reduction_barrier) {
6540 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6541 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6542 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6543 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6545 #endif // KMP_FAST_REDUCTION_BARRIER 6547 #if KMP_FAST_REDUCTION_BARRIER 6548 #undef kmp_reduction_barrier_release_pat 6549 #undef kmp_reduction_barrier_gather_pat 6550 #undef kmp_reduction_barrier_release_bb 6551 #undef kmp_reduction_barrier_gather_bb 6552 #endif // KMP_FAST_REDUCTION_BARRIER 6553 #if KMP_MIC_SUPPORTED 6554 if (__kmp_mic_type == mic2) {
6556 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6557 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6559 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6560 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6562 #if KMP_FAST_REDUCTION_BARRIER 6563 if (__kmp_mic_type == mic2) {
6564 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6565 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6567 #endif // KMP_FAST_REDUCTION_BARRIER 6568 #endif // KMP_MIC_SUPPORTED 6572 __kmp_env_checks = TRUE;
6574 __kmp_env_checks = FALSE;
6578 __kmp_foreign_tp = TRUE;
6580 __kmp_global.g.g_dynamic = FALSE;
6581 __kmp_global.g.g_dynamic_mode = dynamic_default;
6583 __kmp_env_initialize(NULL);
6587 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6588 if (__kmp_str_match_true(val)) {
6589 kmp_str_buf_t buffer;
6590 __kmp_str_buf_init(&buffer);
6591 __kmp_i18n_dump_catalog(&buffer);
6592 __kmp_printf(
"%s", buffer.str);
6593 __kmp_str_buf_free(&buffer);
6595 __kmp_env_free(&val);
6598 __kmp_threads_capacity =
6599 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6601 __kmp_tp_capacity = __kmp_default_tp_capacity(
6602 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6607 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6608 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6609 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6610 __kmp_thread_pool = NULL;
6611 __kmp_thread_pool_insert_pt = NULL;
6612 __kmp_team_pool = NULL;
6619 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6621 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6622 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6623 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6626 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6628 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6633 gtid = __kmp_register_root(TRUE);
6634 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6635 KMP_ASSERT(KMP_UBER_GTID(gtid));
6636 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6640 __kmp_common_initialize();
6644 __kmp_register_atfork();
6647 #if !KMP_DYNAMIC_LIB 6651 int rc = atexit(__kmp_internal_end_atexit);
6653 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6659 #if KMP_HANDLE_SIGNALS 6665 __kmp_install_signals(FALSE);
6668 __kmp_install_signals(TRUE);
6673 __kmp_init_counter++;
6675 __kmp_init_serial = TRUE;
6677 if (__kmp_settings) {
6682 if (__kmp_display_env || __kmp_display_env_verbose) {
6683 __kmp_env_print_2();
6685 #endif // OMP_40_ENABLED 6693 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6696 void __kmp_serial_initialize(
void) {
6697 if (__kmp_init_serial) {
6700 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6701 if (__kmp_init_serial) {
6702 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6705 __kmp_do_serial_initialize();
6706 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6709 static void __kmp_do_middle_initialize(
void) {
6711 int prev_dflt_team_nth;
6713 if (!__kmp_init_serial) {
6714 __kmp_do_serial_initialize();
6717 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6721 prev_dflt_team_nth = __kmp_dflt_team_nth;
6723 #if KMP_AFFINITY_SUPPORTED 6726 __kmp_affinity_initialize();
6730 for (i = 0; i < __kmp_threads_capacity; i++) {
6731 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6732 __kmp_affinity_set_init_mask(i, TRUE);
6737 KMP_ASSERT(__kmp_xproc > 0);
6738 if (__kmp_avail_proc == 0) {
6739 __kmp_avail_proc = __kmp_xproc;
6745 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6746 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6751 if (__kmp_dflt_team_nth == 0) {
6752 #ifdef KMP_DFLT_NTH_CORES 6754 __kmp_dflt_team_nth = __kmp_ncores;
6755 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6756 "__kmp_ncores (%d)\n",
6757 __kmp_dflt_team_nth));
6760 __kmp_dflt_team_nth = __kmp_avail_proc;
6761 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6762 "__kmp_avail_proc(%d)\n",
6763 __kmp_dflt_team_nth));
6767 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6768 __kmp_dflt_team_nth = KMP_MIN_NTH;
6770 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6771 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6776 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6778 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6783 for (i = 0; i < __kmp_threads_capacity; i++) {
6784 kmp_info_t *thread = __kmp_threads[i];
6787 if (thread->th.th_current_task->td_icvs.nproc != 0)
6790 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6795 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6796 __kmp_dflt_team_nth));
6798 #ifdef KMP_ADJUST_BLOCKTIME 6800 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6801 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6802 if (__kmp_nth > __kmp_avail_proc) {
6803 __kmp_zero_bt = TRUE;
6809 TCW_SYNC_4(__kmp_init_middle, TRUE);
6811 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6814 void __kmp_middle_initialize(
void) {
6815 if (__kmp_init_middle) {
6818 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6819 if (__kmp_init_middle) {
6820 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6823 __kmp_do_middle_initialize();
6824 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6827 void __kmp_parallel_initialize(
void) {
6828 int gtid = __kmp_entry_gtid();
6831 if (TCR_4(__kmp_init_parallel))
6833 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6834 if (TCR_4(__kmp_init_parallel)) {
6835 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6840 if (TCR_4(__kmp_global.g.g_done)) {
6843 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6844 __kmp_infinite_loop();
6850 if (!__kmp_init_middle) {
6851 __kmp_do_middle_initialize();
6855 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6856 KMP_ASSERT(KMP_UBER_GTID(gtid));
6858 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6861 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6862 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6863 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6867 #if KMP_HANDLE_SIGNALS 6869 __kmp_install_signals(TRUE);
6873 __kmp_suspend_initialize();
6875 #if defined(USE_LOAD_BALANCE) 6876 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6877 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6880 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6881 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6885 if (__kmp_version) {
6886 __kmp_print_version_2();
6890 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6893 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6895 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6900 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6902 kmp_disp_t *dispatch;
6907 this_thr->th.th_local.this_construct = 0;
6908 #if KMP_CACHE_MANAGE 6909 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6911 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6912 KMP_DEBUG_ASSERT(dispatch);
6913 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6917 dispatch->th_disp_index = 0;
6919 dispatch->th_doacross_buf_idx =
6922 if (__kmp_env_consistency_check)
6923 __kmp_push_parallel(gtid, team->t.t_ident);
6928 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6930 if (__kmp_env_consistency_check)
6931 __kmp_pop_parallel(gtid, team->t.t_ident);
6933 __kmp_finish_implicit_task(this_thr);
6936 int __kmp_invoke_task_func(
int gtid) {
6938 int tid = __kmp_tid_from_gtid(gtid);
6939 kmp_info_t *this_thr = __kmp_threads[gtid];
6940 kmp_team_t *team = this_thr->th.th_team;
6942 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6944 if (__itt_stack_caller_create_ptr) {
6945 __kmp_itt_stack_callee_enter(
6947 team->t.t_stack_id);
6950 #if INCLUDE_SSC_MARKS 6951 SSC_MARK_INVOKING();
6956 void **exit_runtime_p;
6957 ompt_data_t *my_task_data;
6958 ompt_data_t *my_parallel_data;
6961 if (ompt_enabled.enabled) {
6963 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
6965 exit_runtime_p = &dummy;
6969 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6970 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6971 if (ompt_enabled.ompt_callback_implicit_task) {
6972 ompt_team_size = team->t.t_nproc;
6973 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6974 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6975 __kmp_tid_from_gtid(gtid));
6976 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
6981 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6982 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6984 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6985 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6992 *exit_runtime_p = NULL;
6997 if (__itt_stack_caller_create_ptr) {
6998 __kmp_itt_stack_callee_leave(
7000 team->t.t_stack_id);
7003 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7009 void __kmp_teams_master(
int gtid) {
7011 kmp_info_t *thr = __kmp_threads[gtid];
7012 kmp_team_t *team = thr->th.th_team;
7013 ident_t *loc = team->t.t_ident;
7014 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7015 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7016 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7017 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7018 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7021 #if INCLUDE_SSC_MARKS 7024 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7025 (microtask_t)thr->th.th_teams_microtask,
7026 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7027 #if INCLUDE_SSC_MARKS 7033 __kmp_join_call(loc, gtid
7042 int __kmp_invoke_teams_master(
int gtid) {
7043 kmp_info_t *this_thr = __kmp_threads[gtid];
7044 kmp_team_t *team = this_thr->th.th_team;
7046 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7047 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7048 (
void *)__kmp_teams_master);
7050 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7051 __kmp_teams_master(gtid);
7052 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7062 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7063 kmp_info_t *thr = __kmp_threads[gtid];
7065 if (num_threads > 0)
7066 thr->th.th_set_nproc = num_threads;
7073 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7075 kmp_info_t *thr = __kmp_threads[gtid];
7076 KMP_DEBUG_ASSERT(num_teams >= 0);
7077 KMP_DEBUG_ASSERT(num_threads >= 0);
7081 if (num_teams > __kmp_teams_max_nth) {
7082 if (!__kmp_reserve_warn) {
7083 __kmp_reserve_warn = 1;
7084 __kmp_msg(kmp_ms_warning,
7085 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7086 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7088 num_teams = __kmp_teams_max_nth;
7092 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7095 if (num_threads == 0) {
7096 if (!TCR_4(__kmp_init_middle))
7097 __kmp_middle_initialize();
7098 num_threads = __kmp_avail_proc / num_teams;
7099 if (num_teams * num_threads > __kmp_teams_max_nth) {
7101 num_threads = __kmp_teams_max_nth / num_teams;
7104 if (num_teams * num_threads > __kmp_teams_max_nth) {
7105 int new_threads = __kmp_teams_max_nth / num_teams;
7106 if (!__kmp_reserve_warn) {
7107 __kmp_reserve_warn = 1;
7108 __kmp_msg(kmp_ms_warning,
7109 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7110 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7112 num_threads = new_threads;
7115 thr->th.th_teams_size.nth = num_threads;
7119 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7120 kmp_info_t *thr = __kmp_threads[gtid];
7121 thr->th.th_set_proc_bind = proc_bind;
7128 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7129 kmp_info_t *this_thr = __kmp_threads[gtid];
7135 KMP_DEBUG_ASSERT(team);
7136 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7137 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7140 team->t.t_construct = 0;
7141 team->t.t_ordered.dt.t_value =
7145 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7146 if (team->t.t_max_nproc > 1) {
7148 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7149 team->t.t_disp_buffer[i].buffer_index = i;
7151 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7155 team->t.t_disp_buffer[0].buffer_index = 0;
7157 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7162 KMP_ASSERT(this_thr->th.th_team == team);
7165 for (f = 0; f < team->t.t_nproc; f++) {
7166 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7167 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7172 __kmp_fork_barrier(gtid, 0);
7175 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7176 kmp_info_t *this_thr = __kmp_threads[gtid];
7178 KMP_DEBUG_ASSERT(team);
7179 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7180 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7186 if (__kmp_threads[gtid] &&
7187 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7188 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7189 __kmp_threads[gtid]);
7190 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7191 "team->t.t_nproc=%d\n",
7192 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7194 __kmp_print_structure();
7196 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7197 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7200 __kmp_join_barrier(gtid);
7202 if (ompt_enabled.enabled &&
7203 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7204 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7205 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7206 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7208 void *codeptr = NULL;
7209 if (KMP_MASTER_TID(ds_tid) &&
7210 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7211 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7212 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7214 if (ompt_enabled.ompt_callback_sync_region_wait) {
7215 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7216 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7218 if (ompt_enabled.ompt_callback_sync_region) {
7219 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7220 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7223 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7224 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7225 ompt_scope_end, NULL, task_data, 0, ds_tid);
7231 KMP_ASSERT(this_thr->th.th_team == team);
7236 #ifdef USE_LOAD_BALANCE 7240 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7243 kmp_team_t *hot_team;
7245 if (root->r.r_active) {
7248 hot_team = root->r.r_hot_team;
7249 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7250 return hot_team->t.t_nproc - 1;
7255 for (i = 1; i < hot_team->t.t_nproc; i++) {
7256 if (hot_team->t.t_threads[i]->th.th_active) {
7265 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7268 int hot_team_active;
7269 int team_curr_active;
7272 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7274 KMP_DEBUG_ASSERT(root);
7275 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7276 ->th.th_current_task->td_icvs.dynamic == TRUE);
7277 KMP_DEBUG_ASSERT(set_nproc > 1);
7279 if (set_nproc == 1) {
7280 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7289 pool_active = __kmp_thread_pool_active_nth;
7290 hot_team_active = __kmp_active_hot_team_nproc(root);
7291 team_curr_active = pool_active + hot_team_active + 1;
7294 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7295 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7296 "hot team active = %d\n",
7297 system_active, pool_active, hot_team_active));
7299 if (system_active < 0) {
7303 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7304 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7307 retval = __kmp_avail_proc - __kmp_nth +
7308 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7309 if (retval > set_nproc) {
7312 if (retval < KMP_MIN_NTH) {
7313 retval = KMP_MIN_NTH;
7316 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7324 if (system_active < team_curr_active) {
7325 system_active = team_curr_active;
7327 retval = __kmp_avail_proc - system_active + team_curr_active;
7328 if (retval > set_nproc) {
7331 if (retval < KMP_MIN_NTH) {
7332 retval = KMP_MIN_NTH;
7335 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7344 void __kmp_cleanup(
void) {
7347 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7349 if (TCR_4(__kmp_init_parallel)) {
7350 #if KMP_HANDLE_SIGNALS 7351 __kmp_remove_signals();
7353 TCW_4(__kmp_init_parallel, FALSE);
7356 if (TCR_4(__kmp_init_middle)) {
7357 #if KMP_AFFINITY_SUPPORTED 7358 __kmp_affinity_uninitialize();
7360 __kmp_cleanup_hierarchy();
7361 TCW_4(__kmp_init_middle, FALSE);
7364 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7366 if (__kmp_init_serial) {
7367 __kmp_runtime_destroy();
7368 __kmp_init_serial = FALSE;
7371 __kmp_cleanup_threadprivate_caches();
7373 for (f = 0; f < __kmp_threads_capacity; f++) {
7374 if (__kmp_root[f] != NULL) {
7375 __kmp_free(__kmp_root[f]);
7376 __kmp_root[f] = NULL;
7379 __kmp_free(__kmp_threads);
7382 __kmp_threads = NULL;
7384 __kmp_threads_capacity = 0;
7386 #if KMP_USE_DYNAMIC_LOCK 7387 __kmp_cleanup_indirect_user_locks();
7389 __kmp_cleanup_user_locks();
7392 #if KMP_AFFINITY_SUPPORTED 7393 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7394 __kmp_cpuinfo_file = NULL;
7397 #if KMP_USE_ADAPTIVE_LOCKS 7398 #if KMP_DEBUG_ADAPTIVE_LOCKS 7399 __kmp_print_speculative_stats();
7402 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7403 __kmp_nested_nth.nth = NULL;
7404 __kmp_nested_nth.size = 0;
7405 __kmp_nested_nth.used = 0;
7406 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7407 __kmp_nested_proc_bind.bind_types = NULL;
7408 __kmp_nested_proc_bind.size = 0;
7409 __kmp_nested_proc_bind.used = 0;
7411 __kmp_i18n_catclose();
7413 #if KMP_USE_HIER_SCHED 7414 __kmp_hier_scheds.deallocate();
7417 #if KMP_STATS_ENABLED 7421 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7426 int __kmp_ignore_mppbeg(
void) {
7429 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7430 if (__kmp_str_match_false(env))
7437 int __kmp_ignore_mppend(
void) {
7440 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7441 if (__kmp_str_match_false(env))
7448 void __kmp_internal_begin(
void) {
7454 gtid = __kmp_entry_gtid();
7455 root = __kmp_threads[gtid]->th.th_root;
7456 KMP_ASSERT(KMP_UBER_GTID(gtid));
7458 if (root->r.r_begin)
7460 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7461 if (root->r.r_begin) {
7462 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7466 root->r.r_begin = TRUE;
7468 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7473 void __kmp_user_set_library(
enum library_type arg) {
7480 gtid = __kmp_entry_gtid();
7481 thread = __kmp_threads[gtid];
7483 root = thread->th.th_root;
7485 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7487 if (root->r.r_in_parallel) {
7489 KMP_WARNING(SetLibraryIncorrectCall);
7494 case library_serial:
7495 thread->th.th_set_nproc = 0;
7496 set__nproc(thread, 1);
7498 case library_turnaround:
7499 thread->th.th_set_nproc = 0;
7500 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7501 : __kmp_dflt_team_nth_ub);
7503 case library_throughput:
7504 thread->th.th_set_nproc = 0;
7505 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7506 : __kmp_dflt_team_nth_ub);
7509 KMP_FATAL(UnknownLibraryType, arg);
7512 __kmp_aux_set_library(arg);
7515 void __kmp_aux_set_stacksize(
size_t arg) {
7516 if (!__kmp_init_serial)
7517 __kmp_serial_initialize();
7520 if (arg & (0x1000 - 1)) {
7521 arg &= ~(0x1000 - 1);
7526 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7529 if (!TCR_4(__kmp_init_parallel)) {
7532 if (value < __kmp_sys_min_stksize)
7533 value = __kmp_sys_min_stksize;
7534 else if (value > KMP_MAX_STKSIZE)
7535 value = KMP_MAX_STKSIZE;
7537 __kmp_stksize = value;
7539 __kmp_env_stksize = TRUE;
7542 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7547 void __kmp_aux_set_library(
enum library_type arg) {
7548 __kmp_library = arg;
7550 switch (__kmp_library) {
7551 case library_serial: {
7552 KMP_INFORM(LibraryIsSerial);
7553 (void)__kmp_change_library(TRUE);
7555 case library_turnaround:
7556 (void)__kmp_change_library(TRUE);
7558 case library_throughput:
7559 (void)__kmp_change_library(FALSE);
7562 KMP_FATAL(UnknownLibraryType, arg);
7568 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7569 int blocktime = arg;
7575 __kmp_save_internal_controls(thread);
7578 if (blocktime < KMP_MIN_BLOCKTIME)
7579 blocktime = KMP_MIN_BLOCKTIME;
7580 else if (blocktime > KMP_MAX_BLOCKTIME)
7581 blocktime = KMP_MAX_BLOCKTIME;
7583 set__blocktime_team(thread->th.th_team, tid, blocktime);
7584 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7588 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7590 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7591 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7597 set__bt_set_team(thread->th.th_team, tid, bt_set);
7598 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7600 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7601 "bt_intervals=%d, monitor_updates=%d\n",
7602 __kmp_gtid_from_tid(tid, thread->th.th_team),
7603 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7604 __kmp_monitor_wakeups));
7606 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7607 __kmp_gtid_from_tid(tid, thread->th.th_team),
7608 thread->th.th_team->t.t_id, tid, blocktime));
7612 void __kmp_aux_set_defaults(
char const *str,
int len) {
7613 if (!__kmp_init_serial) {
7614 __kmp_serial_initialize();
7616 __kmp_env_initialize(str);
7620 || __kmp_display_env || __kmp_display_env_verbose
7630 PACKED_REDUCTION_METHOD_T
7631 __kmp_determine_reduction_method(
7632 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7633 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7634 kmp_critical_name *lck) {
7645 PACKED_REDUCTION_METHOD_T retval;
7649 KMP_DEBUG_ASSERT(loc);
7650 KMP_DEBUG_ASSERT(lck);
7652 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7653 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7654 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7656 retval = critical_reduce_block;
7659 team_size = __kmp_get_team_num_threads(global_tid);
7660 if (team_size == 1) {
7662 retval = empty_reduce_block;
7666 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7668 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7670 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7671 KMP_OS_DARWIN || KMP_OS_HURD 7673 int teamsize_cutoff = 4;
7675 #if KMP_MIC_SUPPORTED 7676 if (__kmp_mic_type != non_mic) {
7677 teamsize_cutoff = 8;
7680 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7681 if (tree_available) {
7682 if (team_size <= teamsize_cutoff) {
7683 if (atomic_available) {
7684 retval = atomic_reduce_block;
7687 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7689 }
else if (atomic_available) {
7690 retval = atomic_reduce_block;
7693 #error "Unknown or unsupported OS" 7694 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7697 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7699 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_HURD 7703 if (atomic_available) {
7704 if (num_vars <= 2) {
7705 retval = atomic_reduce_block;
7711 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7712 if (atomic_available && (num_vars <= 3)) {
7713 retval = atomic_reduce_block;
7714 }
else if (tree_available) {
7715 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7716 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7717 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7722 #error "Unknown or unsupported OS" 7726 #error "Unknown or unsupported architecture" 7734 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7737 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7739 int atomic_available, tree_available;
7741 switch ((forced_retval = __kmp_force_reduction_method)) {
7742 case critical_reduce_block:
7746 case atomic_reduce_block:
7747 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7748 if (!atomic_available) {
7749 KMP_WARNING(RedMethodNotSupported,
"atomic");
7750 forced_retval = critical_reduce_block;
7754 case tree_reduce_block:
7755 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7756 if (!tree_available) {
7757 KMP_WARNING(RedMethodNotSupported,
"tree");
7758 forced_retval = critical_reduce_block;
7760 #if KMP_FAST_REDUCTION_BARRIER 7761 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7770 retval = forced_retval;
7773 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7775 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7776 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7782 kmp_int32 __kmp_get_reduce_method(
void) {
7783 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)