14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #if OMP_PROFILING_SUPPORT
36 #include "llvm/Support/TimeProfiler.h"
37 static char *ProfileTraceFile =
nullptr;
41 #define KMP_USE_PRCTL 0
47 #include "tsan_annotations.h"
58 #if defined(KMP_GOMP_COMPAT)
59 char const __kmp_version_alt_comp[] =
60 KMP_VERSION_PREFIX
"alternative compiler support: yes";
63 char const __kmp_version_omp_api[] =
64 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
67 char const __kmp_version_lock[] =
68 KMP_VERSION_PREFIX
"lock type: run time selectable";
71 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
76 kmp_info_t __kmp_monitor;
81 void __kmp_cleanup(
void);
83 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
85 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
86 kmp_internal_control_t *new_icvs,
88 #if KMP_AFFINITY_SUPPORTED
89 static void __kmp_partition_places(kmp_team_t *team,
90 int update_master_only = 0);
92 static void __kmp_do_serial_initialize(
void);
93 void __kmp_fork_barrier(
int gtid,
int tid);
94 void __kmp_join_barrier(
int gtid);
95 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
96 kmp_internal_control_t *new_icvs,
ident_t *loc);
98 #ifdef USE_LOAD_BALANCE
99 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
102 static int __kmp_expand_threads(
int nNeed);
104 static int __kmp_unregister_root_other_thread(
int gtid);
106 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
107 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112 int __kmp_get_global_thread_id() {
114 kmp_info_t **other_threads;
122 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
123 __kmp_nth, __kmp_all_nth));
130 if (!TCR_4(__kmp_init_gtid))
133 #ifdef KMP_TDATA_GTID
134 if (TCR_4(__kmp_gtid_mode) >= 3) {
135 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
139 if (TCR_4(__kmp_gtid_mode) >= 2) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
141 return __kmp_gtid_get_specific();
143 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
145 stack_addr = (
char *)&stack_data;
146 other_threads = __kmp_threads;
159 for (i = 0; i < __kmp_threads_capacity; i++) {
161 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
165 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
166 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
170 if (stack_addr <= stack_base) {
171 size_t stack_diff = stack_base - stack_addr;
173 if (stack_diff <= stack_size) {
176 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
184 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
185 "thread, using TLS\n"));
186 i = __kmp_gtid_get_specific();
196 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
197 KMP_FATAL(StackOverflow, i);
200 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
201 if (stack_addr > stack_base) {
202 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
203 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
204 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
208 stack_base - stack_addr);
212 if (__kmp_storage_map) {
213 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
214 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
215 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
216 other_threads[i]->th.th_info.ds.ds_stacksize,
217 "th_%d stack (refinement)", i);
222 int __kmp_get_global_thread_id_reg() {
225 if (!__kmp_init_serial) {
228 #ifdef KMP_TDATA_GTID
229 if (TCR_4(__kmp_gtid_mode) >= 3) {
230 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
234 if (TCR_4(__kmp_gtid_mode) >= 2) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
236 gtid = __kmp_gtid_get_specific();
239 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
240 gtid = __kmp_get_global_thread_id();
244 if (gtid == KMP_GTID_DNE) {
246 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
247 "Registering a new gtid.\n"));
248 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
249 if (!__kmp_init_serial) {
250 __kmp_do_serial_initialize();
251 gtid = __kmp_gtid_get_specific();
253 gtid = __kmp_register_root(FALSE);
255 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
259 KMP_DEBUG_ASSERT(gtid >= 0);
265 void __kmp_check_stack_overlap(kmp_info_t *th) {
267 char *stack_beg = NULL;
268 char *stack_end = NULL;
271 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
272 if (__kmp_storage_map) {
273 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
274 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
276 gtid = __kmp_gtid_from_thread(th);
278 if (gtid == KMP_GTID_MONITOR) {
279 __kmp_print_storage_map_gtid(
280 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
281 "th_%s stack (%s)",
"mon",
282 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%d stack (%s)", gtid,
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
293 gtid = __kmp_gtid_from_thread(th);
294 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
296 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
297 if (stack_beg == NULL) {
298 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
299 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
302 for (f = 0; f < __kmp_threads_capacity; f++) {
303 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
305 if (f_th && f_th != th) {
306 char *other_stack_end =
307 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
308 char *other_stack_beg =
309 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
310 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
311 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
314 if (__kmp_storage_map)
315 __kmp_print_storage_map_gtid(
316 -1, other_stack_beg, other_stack_end,
317 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
318 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
320 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
326 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
331 void __kmp_infinite_loop(
void) {
332 static int done = FALSE;
339 #define MAX_MESSAGE 512
341 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
342 char const *format, ...) {
343 char buffer[MAX_MESSAGE];
346 va_start(ap, format);
347 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
348 p2, (
unsigned long)size, format);
349 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
350 __kmp_vprintf(kmp_err, buffer, ap);
351 #if KMP_PRINT_DATA_PLACEMENT
354 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
355 if (__kmp_storage_map_verbose) {
356 node = __kmp_get_host_node(p1);
358 __kmp_storage_map_verbose = FALSE;
362 int localProc = __kmp_get_cpu_from_gtid(gtid);
364 const int page_size = KMP_GET_PAGE_SIZE();
366 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
367 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
369 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
372 __kmp_printf_no_lock(
" GTID %d\n", gtid);
381 (
char *)p1 += page_size;
382 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
383 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
387 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
388 (
char *)p1 + (page_size - 1),
389 __kmp_get_host_node(p1));
391 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
392 (
char *)p2 + (page_size - 1),
393 __kmp_get_host_node(p2));
399 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
402 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
405 void __kmp_warn(
char const *format, ...) {
406 char buffer[MAX_MESSAGE];
409 if (__kmp_generate_warnings == kmp_warnings_off) {
413 va_start(ap, format);
415 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
416 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
417 __kmp_vprintf(kmp_err, buffer, ap);
418 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
423 void __kmp_abort_process() {
425 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
427 if (__kmp_debug_buf) {
428 __kmp_dump_debug_buffer();
431 if (KMP_OS_WINDOWS) {
434 __kmp_global.g.g_abort = SIGABRT;
448 __kmp_unregister_library();
452 __kmp_infinite_loop();
453 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
457 void __kmp_abort_thread(
void) {
460 __kmp_infinite_loop();
466 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
467 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
470 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
471 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
473 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
474 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
476 __kmp_print_storage_map_gtid(
477 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
478 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
481 &thr->th.th_bar[bs_plain_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
486 &thr->th.th_bar[bs_forkjoin_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
490 #if KMP_FAST_REDUCTION_BARRIER
491 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
492 &thr->th.th_bar[bs_reduction_barrier + 1],
493 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
501 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
502 int team_id,
int num_thr) {
503 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
504 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
507 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
508 &team->t.t_bar[bs_last_barrier],
509 sizeof(kmp_balign_team_t) * bs_last_barrier,
510 "%s_%d.t_bar", header, team_id);
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
513 &team->t.t_bar[bs_plain_barrier + 1],
514 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
518 &team->t.t_bar[bs_forkjoin_barrier + 1],
519 sizeof(kmp_balign_team_t),
520 "%s_%d.t_bar[forkjoin]", header, team_id);
522 #if KMP_FAST_REDUCTION_BARRIER
523 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
524 &team->t.t_bar[bs_reduction_barrier + 1],
525 sizeof(kmp_balign_team_t),
526 "%s_%d.t_bar[reduction]", header, team_id);
529 __kmp_print_storage_map_gtid(
530 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
531 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
533 __kmp_print_storage_map_gtid(
534 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
535 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
537 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
538 &team->t.t_disp_buffer[num_disp_buff],
539 sizeof(dispatch_shared_info_t) * num_disp_buff,
540 "%s_%d.t_disp_buffer", header, team_id);
543 static void __kmp_init_allocator() { __kmp_init_memkind(); }
544 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
551 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
553 __kmp_init_bootstrap_lock(lck);
556 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
574 for (i = 0; i < __kmp_threads_capacity; ++i) {
577 kmp_info_t *th = __kmp_threads[i];
580 int gtid = th->th.th_info.ds.ds_gtid;
581 if (gtid == gtid_req)
586 int alive = __kmp_is_thread_alive(th, &exit_val);
591 if (thread_count == 0)
597 __kmp_reset_lock(&__kmp_forkjoin_lock);
599 __kmp_reset_lock(&__kmp_stdio_lock);
603 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
608 case DLL_PROCESS_ATTACH:
609 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
613 case DLL_PROCESS_DETACH:
614 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
616 if (lpReserved != NULL) {
642 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
645 __kmp_internal_end_library(__kmp_gtid_get_specific());
649 case DLL_THREAD_ATTACH:
650 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
656 case DLL_THREAD_DETACH:
657 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
659 __kmp_internal_end_thread(__kmp_gtid_get_specific());
670 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
671 int gtid = *gtid_ref;
672 #ifdef BUILD_PARALLEL_ORDERED
673 kmp_team_t *team = __kmp_team_from_gtid(gtid);
676 if (__kmp_env_consistency_check) {
677 if (__kmp_threads[gtid]->th.th_root->r.r_active)
678 #if KMP_USE_DYNAMIC_LOCK
679 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
681 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
684 #ifdef BUILD_PARALLEL_ORDERED
685 if (!team->t.t_serialized) {
687 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
695 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
696 int gtid = *gtid_ref;
697 #ifdef BUILD_PARALLEL_ORDERED
698 int tid = __kmp_tid_from_gtid(gtid);
699 kmp_team_t *team = __kmp_team_from_gtid(gtid);
702 if (__kmp_env_consistency_check) {
703 if (__kmp_threads[gtid]->th.th_root->r.r_active)
704 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
706 #ifdef BUILD_PARALLEL_ORDERED
707 if (!team->t.t_serialized) {
712 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
722 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
727 if (!TCR_4(__kmp_init_parallel))
728 __kmp_parallel_initialize();
729 __kmp_resume_if_soft_paused();
731 th = __kmp_threads[gtid];
732 team = th->th.th_team;
735 th->th.th_ident = id_ref;
737 if (team->t.t_serialized) {
740 kmp_int32 old_this = th->th.th_local.this_construct;
742 ++th->th.th_local.this_construct;
746 if (team->t.t_construct == old_this) {
747 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
748 th->th.th_local.this_construct);
751 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
752 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
753 team->t.t_active_level ==
755 __kmp_itt_metadata_single(id_ref);
760 if (__kmp_env_consistency_check) {
761 if (status && push_ws) {
762 __kmp_push_workshare(gtid, ct_psingle, id_ref);
764 __kmp_check_workshare(gtid, ct_psingle, id_ref);
769 __kmp_itt_single_start(gtid);
775 void __kmp_exit_single(
int gtid) {
777 __kmp_itt_single_end(gtid);
779 if (__kmp_env_consistency_check)
780 __kmp_pop_workshare(gtid, ct_psingle, NULL);
789 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
790 int master_tid,
int set_nthreads,
794 KMP_DEBUG_ASSERT(__kmp_init_serial);
795 KMP_DEBUG_ASSERT(root && parent_team);
796 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
800 new_nthreads = set_nthreads;
801 if (!get__dynamic_2(parent_team, master_tid)) {
804 #ifdef USE_LOAD_BALANCE
805 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
806 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
807 if (new_nthreads == 1) {
808 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
809 "reservation to 1 thread\n",
813 if (new_nthreads < set_nthreads) {
814 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
815 "reservation to %d threads\n",
816 master_tid, new_nthreads));
820 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
821 new_nthreads = __kmp_avail_proc - __kmp_nth +
822 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
823 if (new_nthreads <= 1) {
824 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
825 "reservation to 1 thread\n",
829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
834 new_nthreads = set_nthreads;
836 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
837 if (set_nthreads > 2) {
838 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
839 new_nthreads = (new_nthreads % set_nthreads) + 1;
840 if (new_nthreads == 1) {
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
842 "reservation to 1 thread\n",
846 if (new_nthreads < set_nthreads) {
847 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
848 "reservation to %d threads\n",
849 master_tid, new_nthreads));
857 if (__kmp_nth + new_nthreads -
858 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
860 int tl_nthreads = __kmp_max_nth - __kmp_nth +
861 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
862 if (tl_nthreads <= 0) {
867 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
868 __kmp_reserve_warn = 1;
869 __kmp_msg(kmp_ms_warning,
870 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
871 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
873 if (tl_nthreads == 1) {
874 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
875 "reduced reservation to 1 thread\n",
879 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
880 "reservation to %d threads\n",
881 master_tid, tl_nthreads));
882 new_nthreads = tl_nthreads;
886 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
887 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
888 if (cg_nthreads + new_nthreads -
889 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
891 int tl_nthreads = max_cg_threads - cg_nthreads +
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
893 if (tl_nthreads <= 0) {
898 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
899 __kmp_reserve_warn = 1;
900 __kmp_msg(kmp_ms_warning,
901 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
902 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
904 if (tl_nthreads == 1) {
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
906 "reduced reservation to 1 thread\n",
910 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
911 "reservation to %d threads\n",
912 master_tid, tl_nthreads));
913 new_nthreads = tl_nthreads;
919 capacity = __kmp_threads_capacity;
920 if (TCR_PTR(__kmp_threads[0]) == NULL) {
926 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
927 capacity -= __kmp_hidden_helper_threads_num;
929 if (__kmp_nth + new_nthreads -
930 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
933 int slotsRequired = __kmp_nth + new_nthreads -
934 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
936 int slotsAdded = __kmp_expand_threads(slotsRequired);
937 if (slotsAdded < slotsRequired) {
939 new_nthreads -= (slotsRequired - slotsAdded);
940 KMP_ASSERT(new_nthreads >= 1);
943 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
944 __kmp_reserve_warn = 1;
945 if (__kmp_tp_cached) {
946 __kmp_msg(kmp_ms_warning,
947 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
948 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
949 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
951 __kmp_msg(kmp_ms_warning,
952 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
953 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
960 if (new_nthreads == 1) {
962 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
963 "dead roots and rechecking; requested %d threads\n",
964 __kmp_get_gtid(), set_nthreads));
966 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
968 __kmp_get_gtid(), new_nthreads, set_nthreads));
977 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
978 kmp_info_t *master_th,
int master_gtid) {
982 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
983 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
987 master_th->th.th_info.ds.ds_tid = 0;
988 master_th->th.th_team = team;
989 master_th->th.th_team_nproc = team->t.t_nproc;
990 master_th->th.th_team_master = master_th;
991 master_th->th.th_team_serialized = FALSE;
992 master_th->th.th_dispatch = &team->t.t_dispatch[0];
995 #if KMP_NESTED_HOT_TEAMS
997 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1000 int level = team->t.t_active_level - 1;
1001 if (master_th->th.th_teams_microtask) {
1002 if (master_th->th.th_teams_size.nteams > 1) {
1006 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1007 master_th->th.th_teams_level == team->t.t_level) {
1012 if (level < __kmp_hot_teams_max_level) {
1013 if (hot_teams[level].hot_team) {
1015 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1019 hot_teams[level].hot_team = team;
1020 hot_teams[level].hot_team_nth = team->t.t_nproc;
1027 use_hot_team = team == root->r.r_hot_team;
1029 if (!use_hot_team) {
1032 team->t.t_threads[0] = master_th;
1033 __kmp_initialize_info(master_th, team, 0, master_gtid);
1036 for (i = 1; i < team->t.t_nproc; i++) {
1039 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1040 team->t.t_threads[i] = thr;
1041 KMP_DEBUG_ASSERT(thr);
1042 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1044 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1045 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1046 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1047 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1048 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1049 team->t.t_bar[bs_plain_barrier].b_arrived));
1050 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1051 thr->th.th_teams_level = master_th->th.th_teams_level;
1052 thr->th.th_teams_size = master_th->th.th_teams_size;
1055 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1056 for (b = 0; b < bs_last_barrier; ++b) {
1057 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1058 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1060 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1066 #if KMP_AFFINITY_SUPPORTED
1067 __kmp_partition_places(team);
1071 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1072 for (i = 0; i < team->t.t_nproc; i++) {
1073 kmp_info_t *thr = team->t.t_threads[i];
1074 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1075 thr->th.th_prev_level != team->t.t_level) {
1076 team->t.t_display_affinity = 1;
1085 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1089 inline static void propagateFPControl(kmp_team_t *team) {
1090 if (__kmp_inherit_fp_control) {
1091 kmp_int16 x87_fpu_control_word;
1095 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1096 __kmp_store_mxcsr(&mxcsr);
1097 mxcsr &= KMP_X86_MXCSR_MASK;
1108 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1109 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1112 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1116 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1122 inline static void updateHWFPControl(kmp_team_t *team) {
1123 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1126 kmp_int16 x87_fpu_control_word;
1128 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1129 __kmp_store_mxcsr(&mxcsr);
1130 mxcsr &= KMP_X86_MXCSR_MASK;
1132 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1133 __kmp_clear_x87_fpu_status_word();
1134 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1137 if (team->t.t_mxcsr != mxcsr) {
1138 __kmp_load_mxcsr(&team->t.t_mxcsr);
1143 #define propagateFPControl(x) ((void)0)
1144 #define updateHWFPControl(x) ((void)0)
1147 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1152 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1153 kmp_info_t *this_thr;
1154 kmp_team_t *serial_team;
1156 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1163 if (!TCR_4(__kmp_init_parallel))
1164 __kmp_parallel_initialize();
1165 __kmp_resume_if_soft_paused();
1167 this_thr = __kmp_threads[global_tid];
1168 serial_team = this_thr->th.th_serial_team;
1171 KMP_DEBUG_ASSERT(serial_team);
1174 if (__kmp_tasking_mode != tskm_immediate_exec) {
1176 this_thr->th.th_task_team ==
1177 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1178 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1180 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1181 "team %p, new task_team = NULL\n",
1182 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1183 this_thr->th.th_task_team = NULL;
1186 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1187 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1188 proc_bind = proc_bind_false;
1189 }
else if (proc_bind == proc_bind_default) {
1192 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1195 this_thr->th.th_set_proc_bind = proc_bind_default;
1198 ompt_data_t ompt_parallel_data = ompt_data_none;
1199 ompt_data_t *implicit_task_data;
1200 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1201 if (ompt_enabled.enabled &&
1202 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1204 ompt_task_info_t *parent_task_info;
1205 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1207 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1208 if (ompt_enabled.ompt_callback_parallel_begin) {
1211 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1212 &(parent_task_info->task_data), &(parent_task_info->frame),
1213 &ompt_parallel_data, team_size,
1214 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1219 if (this_thr->th.th_team != serial_team) {
1221 int level = this_thr->th.th_team->t.t_level;
1223 if (serial_team->t.t_serialized) {
1226 kmp_team_t *new_team;
1228 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1231 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1235 proc_bind, &this_thr->th.th_current_task->td_icvs,
1236 0 USE_NESTED_HOT_ARG(NULL));
1237 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1238 KMP_ASSERT(new_team);
1241 new_team->t.t_threads[0] = this_thr;
1242 new_team->t.t_parent = this_thr->th.th_team;
1243 serial_team = new_team;
1244 this_thr->th.th_serial_team = serial_team;
1248 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1249 global_tid, serial_team));
1257 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1258 global_tid, serial_team));
1262 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1263 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1264 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1265 serial_team->t.t_ident = loc;
1266 serial_team->t.t_serialized = 1;
1267 serial_team->t.t_nproc = 1;
1268 serial_team->t.t_parent = this_thr->th.th_team;
1269 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1270 this_thr->th.th_team = serial_team;
1271 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1273 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1274 this_thr->th.th_current_task));
1275 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1276 this_thr->th.th_current_task->td_flags.executing = 0;
1278 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1283 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1284 &this_thr->th.th_current_task->td_parent->td_icvs);
1288 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1289 this_thr->th.th_current_task->td_icvs.nproc =
1290 __kmp_nested_nth.nth[level + 1];
1293 if (__kmp_nested_proc_bind.used &&
1294 (level + 1 < __kmp_nested_proc_bind.used)) {
1295 this_thr->th.th_current_task->td_icvs.proc_bind =
1296 __kmp_nested_proc_bind.bind_types[level + 1];
1300 serial_team->t.t_pkfn = (microtask_t)(~0);
1302 this_thr->th.th_info.ds.ds_tid = 0;
1305 this_thr->th.th_team_nproc = 1;
1306 this_thr->th.th_team_master = this_thr;
1307 this_thr->th.th_team_serialized = 1;
1309 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1310 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1311 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1313 propagateFPControl(serial_team);
1316 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1317 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1318 serial_team->t.t_dispatch->th_disp_buffer =
1319 (dispatch_private_info_t *)__kmp_allocate(
1320 sizeof(dispatch_private_info_t));
1322 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1329 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1330 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1331 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1332 ++serial_team->t.t_serialized;
1333 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1336 int level = this_thr->th.th_team->t.t_level;
1339 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1340 this_thr->th.th_current_task->td_icvs.nproc =
1341 __kmp_nested_nth.nth[level + 1];
1343 serial_team->t.t_level++;
1344 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1345 "of serial team %p to %d\n",
1346 global_tid, serial_team, serial_team->t.t_level));
1349 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1351 dispatch_private_info_t *disp_buffer =
1352 (dispatch_private_info_t *)__kmp_allocate(
1353 sizeof(dispatch_private_info_t));
1354 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1355 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1357 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1361 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1365 if (__kmp_display_affinity) {
1366 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1367 this_thr->th.th_prev_num_threads != 1) {
1369 __kmp_aux_display_affinity(global_tid, NULL);
1370 this_thr->th.th_prev_level = serial_team->t.t_level;
1371 this_thr->th.th_prev_num_threads = 1;
1375 if (__kmp_env_consistency_check)
1376 __kmp_push_parallel(global_tid, NULL);
1378 serial_team->t.ompt_team_info.master_return_address = codeptr;
1379 if (ompt_enabled.enabled &&
1380 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1381 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1383 ompt_lw_taskteam_t lw_taskteam;
1384 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1385 &ompt_parallel_data, codeptr);
1387 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1391 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1392 if (ompt_enabled.ompt_callback_implicit_task) {
1393 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1394 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1395 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit);
1396 OMPT_CUR_TASK_INFO(this_thr)
1397 ->thread_num = __kmp_tid_from_gtid(global_tid);
1401 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1402 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1409 int __kmp_fork_call(
ident_t *loc,
int gtid,
1410 enum fork_context_e call_context,
1411 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1416 int master_this_cons;
1418 kmp_team_t *parent_team;
1419 kmp_info_t *master_th;
1423 int master_set_numthreads;
1427 #if KMP_NESTED_HOT_TEAMS
1428 kmp_hot_team_ptr_t **p_hot_teams;
1431 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1434 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1435 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1438 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1440 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1441 __kmp_stkpadding += (short)((kmp_int64)dummy);
1447 if (!TCR_4(__kmp_init_parallel))
1448 __kmp_parallel_initialize();
1449 __kmp_resume_if_soft_paused();
1452 master_th = __kmp_threads[gtid];
1454 parent_team = master_th->th.th_team;
1455 master_tid = master_th->th.th_info.ds.ds_tid;
1456 master_this_cons = master_th->th.th_local.this_construct;
1457 root = master_th->th.th_root;
1458 master_active = root->r.r_active;
1459 master_set_numthreads = master_th->th.th_set_nproc;
1462 ompt_data_t ompt_parallel_data = ompt_data_none;
1463 ompt_data_t *parent_task_data;
1464 ompt_frame_t *ompt_frame;
1465 ompt_data_t *implicit_task_data;
1466 void *return_address = NULL;
1468 if (ompt_enabled.enabled) {
1469 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1471 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1476 level = parent_team->t.t_level;
1478 active_level = parent_team->t.t_active_level;
1480 teams_level = master_th->th.th_teams_level;
1481 #if KMP_NESTED_HOT_TEAMS
1482 p_hot_teams = &master_th->th.th_hot_teams;
1483 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1484 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1485 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1486 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1488 (*p_hot_teams)[0].hot_team_nth = 1;
1493 if (ompt_enabled.enabled) {
1494 if (ompt_enabled.ompt_callback_parallel_begin) {
1495 int team_size = master_set_numthreads
1496 ? master_set_numthreads
1497 : get__nproc_2(parent_team, master_tid);
1498 int flags = OMPT_INVOKER(call_context) |
1499 ((microtask == (microtask_t)__kmp_teams_master)
1500 ? ompt_parallel_league
1501 : ompt_parallel_team);
1502 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1503 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1506 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1510 master_th->th.th_ident = loc;
1512 if (master_th->th.th_teams_microtask && ap &&
1513 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1517 parent_team->t.t_ident = loc;
1518 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1519 parent_team->t.t_argc = argc;
1520 argv = (
void **)parent_team->t.t_argv;
1521 for (i = argc - 1; i >= 0; --i)
1522 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1524 if (parent_team == master_th->th.th_serial_team) {
1527 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1529 if (call_context == fork_context_gnu) {
1532 parent_team->t.t_serialized--;
1538 void **exit_frame_p;
1540 ompt_lw_taskteam_t lw_taskteam;
1542 if (ompt_enabled.enabled) {
1543 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1544 &ompt_parallel_data, return_address);
1545 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1547 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1551 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1552 if (ompt_enabled.ompt_callback_implicit_task) {
1553 OMPT_CUR_TASK_INFO(master_th)
1554 ->thread_num = __kmp_tid_from_gtid(gtid);
1555 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1556 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1557 implicit_task_data, 1,
1558 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1562 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1564 exit_frame_p = &dummy;
1569 parent_team->t.t_serialized--;
1572 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1573 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1574 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1583 if (ompt_enabled.enabled) {
1584 *exit_frame_p = NULL;
1585 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1586 if (ompt_enabled.ompt_callback_implicit_task) {
1587 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1588 ompt_scope_end, NULL, implicit_task_data, 1,
1589 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1591 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1592 __ompt_lw_taskteam_unlink(master_th);
1593 if (ompt_enabled.ompt_callback_parallel_end) {
1594 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1595 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1596 OMPT_INVOKER(call_context) | ompt_parallel_team,
1599 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1605 parent_team->t.t_pkfn = microtask;
1606 parent_team->t.t_invoke = invoker;
1607 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1608 parent_team->t.t_active_level++;
1609 parent_team->t.t_level++;
1610 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1613 if (ompt_enabled.enabled) {
1614 ompt_lw_taskteam_t lw_taskteam;
1615 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1616 &ompt_parallel_data, return_address);
1617 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1622 if (master_set_numthreads) {
1623 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1625 kmp_info_t **other_threads = parent_team->t.t_threads;
1626 parent_team->t.t_nproc = master_set_numthreads;
1627 for (i = 0; i < master_set_numthreads; ++i) {
1628 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1632 master_th->th.th_set_nproc = 0;
1636 if (__kmp_debugging) {
1637 int nth = __kmp_omp_num_threads(loc);
1639 master_set_numthreads = nth;
1644 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1645 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1647 __kmp_forkjoin_frames_mode == 3 &&
1648 parent_team->t.t_active_level == 1
1649 && master_th->th.th_teams_size.nteams == 1) {
1650 kmp_uint64 tmp_time = __itt_get_timestamp();
1651 master_th->th.th_frame_time = tmp_time;
1652 parent_team->t.t_region_time = tmp_time;
1654 if (__itt_stack_caller_create_ptr) {
1656 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1660 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1661 "master_th=%p, gtid=%d\n",
1662 root, parent_team, master_th, gtid));
1663 __kmp_internal_fork(loc, gtid, parent_team);
1664 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1665 "master_th=%p, gtid=%d\n",
1666 root, parent_team, master_th, gtid));
1668 if (call_context == fork_context_gnu)
1672 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1673 parent_team->t.t_id, parent_team->t.t_pkfn));
1675 if (!parent_team->t.t_invoke(gtid)) {
1676 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1678 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1679 parent_team->t.t_id, parent_team->t.t_pkfn));
1682 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1688 if (__kmp_tasking_mode != tskm_immediate_exec) {
1689 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1690 parent_team->t.t_task_team[master_th->th.th_task_state]);
1694 if (parent_team->t.t_active_level >=
1695 master_th->th.th_current_task->td_icvs.max_active_levels) {
1698 int enter_teams = ((ap == NULL && active_level == 0) ||
1699 (ap && teams_level > 0 && teams_level == level));
1701 master_set_numthreads
1702 ? master_set_numthreads
1711 if ((get__max_active_levels(master_th) == 1 &&
1712 (root->r.r_in_parallel && !enter_teams)) ||
1713 (__kmp_library == library_serial)) {
1714 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1722 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1727 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1728 nthreads, enter_teams);
1729 if (nthreads == 1) {
1733 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1737 KMP_DEBUG_ASSERT(nthreads > 0);
1740 master_th->th.th_set_nproc = 0;
1743 if (nthreads == 1) {
1745 #if KMP_OS_LINUX && \
1746 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1749 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1754 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1758 if (call_context == fork_context_intel) {
1760 master_th->th.th_serial_team->t.t_ident = loc;
1763 master_th->th.th_serial_team->t.t_level--;
1768 void **exit_frame_p;
1769 ompt_task_info_t *task_info;
1771 ompt_lw_taskteam_t lw_taskteam;
1773 if (ompt_enabled.enabled) {
1774 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1775 &ompt_parallel_data, return_address);
1777 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1780 task_info = OMPT_CUR_TASK_INFO(master_th);
1781 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1782 if (ompt_enabled.ompt_callback_implicit_task) {
1783 OMPT_CUR_TASK_INFO(master_th)
1784 ->thread_num = __kmp_tid_from_gtid(gtid);
1785 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1786 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1787 &(task_info->task_data), 1,
1788 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1789 ompt_task_implicit);
1793 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1795 exit_frame_p = &dummy;
1800 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1801 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1802 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1803 parent_team->t.t_argv
1812 if (ompt_enabled.enabled) {
1813 *exit_frame_p = NULL;
1814 if (ompt_enabled.ompt_callback_implicit_task) {
1815 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1816 ompt_scope_end, NULL, &(task_info->task_data), 1,
1817 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1818 ompt_task_implicit);
1820 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1821 __ompt_lw_taskteam_unlink(master_th);
1822 if (ompt_enabled.ompt_callback_parallel_end) {
1823 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1824 &ompt_parallel_data, parent_task_data,
1825 OMPT_INVOKER(call_context) | ompt_parallel_team,
1828 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1831 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1832 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1833 master_th->th.th_serial_team);
1834 team = master_th->th.th_team;
1836 team->t.t_invoke = invoker;
1837 __kmp_alloc_argv_entries(argc, team, TRUE);
1838 team->t.t_argc = argc;
1839 argv = (
void **)team->t.t_argv;
1841 for (i = argc - 1; i >= 0; --i)
1842 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1844 for (i = 0; i < argc; ++i)
1846 argv[i] = parent_team->t.t_argv[i];
1854 if (ompt_enabled.enabled) {
1855 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1856 if (ompt_enabled.ompt_callback_implicit_task) {
1857 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1858 ompt_scope_end, NULL, &(task_info->task_data), 0,
1859 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1861 if (ompt_enabled.ompt_callback_parallel_end) {
1862 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1863 &ompt_parallel_data, parent_task_data,
1864 OMPT_INVOKER(call_context) | ompt_parallel_league,
1867 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1872 for (i = argc - 1; i >= 0; --i)
1873 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1878 void **exit_frame_p;
1879 ompt_task_info_t *task_info;
1881 ompt_lw_taskteam_t lw_taskteam;
1883 if (ompt_enabled.enabled) {
1884 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1885 &ompt_parallel_data, return_address);
1886 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1888 task_info = OMPT_CUR_TASK_INFO(master_th);
1889 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1892 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1893 if (ompt_enabled.ompt_callback_implicit_task) {
1894 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1895 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1896 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1897 ompt_task_implicit);
1898 OMPT_CUR_TASK_INFO(master_th)
1899 ->thread_num = __kmp_tid_from_gtid(gtid);
1903 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1905 exit_frame_p = &dummy;
1910 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1911 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1912 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1921 if (ompt_enabled.enabled) {
1922 *exit_frame_p = NULL;
1923 if (ompt_enabled.ompt_callback_implicit_task) {
1924 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1925 ompt_scope_end, NULL, &(task_info->task_data), 1,
1926 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1927 ompt_task_implicit);
1930 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1931 __ompt_lw_taskteam_unlink(master_th);
1932 if (ompt_enabled.ompt_callback_parallel_end) {
1933 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1934 &ompt_parallel_data, parent_task_data,
1935 OMPT_INVOKER(call_context) | ompt_parallel_team,
1938 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1942 }
else if (call_context == fork_context_gnu) {
1944 ompt_lw_taskteam_t lwt;
1945 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1948 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1949 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1954 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1957 KMP_ASSERT2(call_context < fork_context_last,
1958 "__kmp_fork_call: unknown fork_context parameter");
1961 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1968 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1969 "curtask=%p, curtask_max_aclevel=%d\n",
1970 parent_team->t.t_active_level, master_th,
1971 master_th->th.th_current_task,
1972 master_th->th.th_current_task->td_icvs.max_active_levels));
1976 master_th->th.th_current_task->td_flags.executing = 0;
1978 if (!master_th->th.th_teams_microtask || level > teams_level) {
1980 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1984 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1985 if ((level + 1 < __kmp_nested_nth.used) &&
1986 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1987 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1993 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1994 kmp_proc_bind_t proc_bind_icv =
1996 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1997 proc_bind = proc_bind_false;
1999 if (proc_bind == proc_bind_default) {
2002 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2008 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2009 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2010 master_th->th.th_current_task->td_icvs.proc_bind)) {
2011 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2016 master_th->th.th_set_proc_bind = proc_bind_default;
2018 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2019 kmp_internal_control_t new_icvs;
2020 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2021 new_icvs.next = NULL;
2022 if (nthreads_icv > 0) {
2023 new_icvs.nproc = nthreads_icv;
2025 if (proc_bind_icv != proc_bind_default) {
2026 new_icvs.proc_bind = proc_bind_icv;
2030 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2031 team = __kmp_allocate_team(root, nthreads, nthreads,
2035 proc_bind, &new_icvs,
2036 argc USE_NESTED_HOT_ARG(master_th));
2039 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2040 team = __kmp_allocate_team(root, nthreads, nthreads,
2045 &master_th->th.th_current_task->td_icvs,
2046 argc USE_NESTED_HOT_ARG(master_th));
2049 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2052 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2053 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2054 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2055 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2056 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2058 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2061 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2063 if (!master_th->th.th_teams_microtask || level > teams_level) {
2064 int new_level = parent_team->t.t_level + 1;
2065 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2066 new_level = parent_team->t.t_active_level + 1;
2067 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2070 int new_level = parent_team->t.t_level;
2071 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2072 new_level = parent_team->t.t_active_level;
2073 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2075 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2077 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2079 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2080 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2083 propagateFPControl(team);
2085 if (__kmp_tasking_mode != tskm_immediate_exec) {
2088 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2089 parent_team->t.t_task_team[master_th->th.th_task_state]);
2090 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team "
2091 "%p, new task_team %p / team %p\n",
2092 __kmp_gtid_from_thread(master_th),
2093 master_th->th.th_task_team, parent_team,
2094 team->t.t_task_team[master_th->th.th_task_state], team));
2096 if (active_level || master_th->th.th_task_team) {
2098 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2099 if (master_th->th.th_task_state_top >=
2100 master_th->th.th_task_state_stack_sz) {
2101 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2102 kmp_uint8 *old_stack, *new_stack;
2104 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2105 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2106 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2108 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2112 old_stack = master_th->th.th_task_state_memo_stack;
2113 master_th->th.th_task_state_memo_stack = new_stack;
2114 master_th->th.th_task_state_stack_sz = new_size;
2115 __kmp_free(old_stack);
2119 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2120 master_th->th.th_task_state;
2121 master_th->th.th_task_state_top++;
2122 #if KMP_NESTED_HOT_TEAMS
2123 if (master_th->th.th_hot_teams &&
2124 active_level < __kmp_hot_teams_max_level &&
2125 team == master_th->th.th_hot_teams[active_level].hot_team) {
2127 master_th->th.th_task_state =
2129 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2132 master_th->th.th_task_state = 0;
2133 #if KMP_NESTED_HOT_TEAMS
2137 #if !KMP_NESTED_HOT_TEAMS
2138 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2139 (team == root->r.r_hot_team));
2145 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2146 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2148 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2149 (team->t.t_master_tid == 0 &&
2150 (team->t.t_parent == root->r.r_root_team ||
2151 team->t.t_parent->t.t_serialized)));
2155 argv = (
void **)team->t.t_argv;
2157 for (i = argc - 1; i >= 0; --i) {
2158 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2159 KMP_CHECK_UPDATE(*argv, new_argv);
2163 for (i = 0; i < argc; ++i) {
2165 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2170 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2171 if (!root->r.r_active)
2172 root->r.r_active = TRUE;
2174 __kmp_fork_team_threads(root, team, master_th, gtid);
2175 __kmp_setup_icv_copy(team, nthreads,
2176 &master_th->th.th_current_task->td_icvs, loc);
2179 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2182 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2185 if (team->t.t_active_level == 1
2186 && !master_th->th.th_teams_microtask) {
2188 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2189 (__kmp_forkjoin_frames_mode == 3 ||
2190 __kmp_forkjoin_frames_mode == 1)) {
2191 kmp_uint64 tmp_time = 0;
2192 if (__itt_get_timestamp_ptr)
2193 tmp_time = __itt_get_timestamp();
2195 master_th->th.th_frame_time = tmp_time;
2196 if (__kmp_forkjoin_frames_mode == 3)
2197 team->t.t_region_time = tmp_time;
2201 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2202 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2204 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2210 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2213 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2214 root, team, master_th, gtid));
2217 if (__itt_stack_caller_create_ptr) {
2218 team->t.t_stack_id =
2219 __kmp_itt_stack_caller_create();
2227 __kmp_internal_fork(loc, gtid, team);
2228 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2229 "master_th=%p, gtid=%d\n",
2230 root, team, master_th, gtid));
2233 if (call_context == fork_context_gnu) {
2234 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2239 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2240 team->t.t_id, team->t.t_pkfn));
2243 #if KMP_STATS_ENABLED
2247 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2251 if (!team->t.t_invoke(gtid)) {
2252 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2255 #if KMP_STATS_ENABLED
2258 KMP_SET_THREAD_STATE(previous_state);
2262 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2263 team->t.t_id, team->t.t_pkfn));
2266 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2269 if (ompt_enabled.enabled) {
2270 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2278 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2281 thread->th.ompt_thread_info.state =
2282 ((team->t.t_serialized) ? ompt_state_work_serial
2283 : ompt_state_work_parallel);
2286 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2287 kmp_team_t *team, ompt_data_t *parallel_data,
2288 int flags,
void *codeptr) {
2289 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2290 if (ompt_enabled.ompt_callback_parallel_end) {
2291 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2292 parallel_data, &(task_info->task_data), flags, codeptr);
2295 task_info->frame.enter_frame = ompt_data_none;
2296 __kmp_join_restore_state(thread, team);
2300 void __kmp_join_call(
ident_t *loc,
int gtid
2303 enum fork_context_e fork_context
2307 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2309 kmp_team_t *parent_team;
2310 kmp_info_t *master_th;
2314 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2317 master_th = __kmp_threads[gtid];
2318 root = master_th->th.th_root;
2319 team = master_th->th.th_team;
2320 parent_team = team->t.t_parent;
2322 master_th->th.th_ident = loc;
2325 void *team_microtask = (
void *)team->t.t_pkfn;
2329 if (ompt_enabled.enabled &&
2330 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2331 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2336 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2337 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2338 "th_task_team = %p\n",
2339 __kmp_gtid_from_thread(master_th), team,
2340 team->t.t_task_team[master_th->th.th_task_state],
2341 master_th->th.th_task_team));
2342 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2343 team->t.t_task_team[master_th->th.th_task_state]);
2347 if (team->t.t_serialized) {
2348 if (master_th->th.th_teams_microtask) {
2350 int level = team->t.t_level;
2351 int tlevel = master_th->th.th_teams_level;
2352 if (level == tlevel) {
2356 }
else if (level == tlevel + 1) {
2360 team->t.t_serialized++;
2366 if (ompt_enabled.enabled) {
2367 __kmp_join_restore_state(master_th, parent_team);
2374 master_active = team->t.t_master_active;
2379 __kmp_internal_join(loc, gtid, team);
2381 master_th->th.th_task_state =
2388 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2389 void *codeptr = team->t.ompt_team_info.master_return_address;
2393 if (__itt_stack_caller_create_ptr) {
2395 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2398 if (team->t.t_active_level == 1 &&
2399 (!master_th->th.th_teams_microtask ||
2400 master_th->th.th_teams_size.nteams == 1)) {
2401 master_th->th.th_ident = loc;
2404 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2405 __kmp_forkjoin_frames_mode == 3)
2406 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2407 master_th->th.th_frame_time, 0, loc,
2408 master_th->th.th_team_nproc, 1);
2409 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2410 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2411 __kmp_itt_region_joined(gtid);
2415 if (master_th->th.th_teams_microtask && !exit_teams &&
2416 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2417 team->t.t_level == master_th->th.th_teams_level + 1) {
2422 ompt_data_t ompt_parallel_data = ompt_data_none;
2423 if (ompt_enabled.enabled) {
2424 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2425 if (ompt_enabled.ompt_callback_implicit_task) {
2426 int ompt_team_size = team->t.t_nproc;
2427 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2428 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2429 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2431 task_info->frame.exit_frame = ompt_data_none;
2432 task_info->task_data = ompt_data_none;
2433 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2434 __ompt_lw_taskteam_unlink(master_th);
2439 team->t.t_active_level--;
2440 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2446 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2447 int old_num = master_th->th.th_team_nproc;
2448 int new_num = master_th->th.th_teams_size.nth;
2449 kmp_info_t **other_threads = team->t.t_threads;
2450 team->t.t_nproc = new_num;
2451 for (
int i = 0; i < old_num; ++i) {
2452 other_threads[i]->th.th_team_nproc = new_num;
2455 for (
int i = old_num; i < new_num; ++i) {
2457 KMP_DEBUG_ASSERT(other_threads[i]);
2458 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2459 for (
int b = 0; b < bs_last_barrier; ++b) {
2460 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2461 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2463 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2466 if (__kmp_tasking_mode != tskm_immediate_exec) {
2468 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2474 if (ompt_enabled.enabled) {
2475 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2476 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2484 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2485 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2487 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2492 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2494 if (!master_th->th.th_teams_microtask ||
2495 team->t.t_level > master_th->th.th_teams_level) {
2497 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2499 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2502 if (ompt_enabled.enabled) {
2503 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2504 if (ompt_enabled.ompt_callback_implicit_task) {
2505 int flags = (team_microtask == (
void *)__kmp_teams_master)
2507 : ompt_task_implicit;
2508 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2509 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2510 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2511 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2513 task_info->frame.exit_frame = ompt_data_none;
2514 task_info->task_data = ompt_data_none;
2518 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2520 __kmp_pop_current_task_from_thread(master_th);
2522 #if KMP_AFFINITY_SUPPORTED
2524 master_th->th.th_first_place = team->t.t_first_place;
2525 master_th->th.th_last_place = team->t.t_last_place;
2527 master_th->th.th_def_allocator = team->t.t_def_allocator;
2529 updateHWFPControl(team);
2531 if (root->r.r_active != master_active)
2532 root->r.r_active = master_active;
2534 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2542 master_th->th.th_team = parent_team;
2543 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2544 master_th->th.th_team_master = parent_team->t.t_threads[0];
2545 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2548 if (parent_team->t.t_serialized &&
2549 parent_team != master_th->th.th_serial_team &&
2550 parent_team != root->r.r_root_team) {
2551 __kmp_free_team(root,
2552 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2553 master_th->th.th_serial_team = parent_team;
2556 if (__kmp_tasking_mode != tskm_immediate_exec) {
2557 if (master_th->th.th_task_state_top >
2559 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2561 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2562 master_th->th.th_task_state;
2563 --master_th->th.th_task_state_top;
2565 master_th->th.th_task_state =
2567 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2570 master_th->th.th_task_team =
2571 parent_team->t.t_task_team[master_th->th.th_task_state];
2573 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2574 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2581 master_th->th.th_current_task->td_flags.executing = 1;
2583 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2587 OMPT_INVOKER(fork_context) |
2588 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2589 : ompt_parallel_team);
2590 if (ompt_enabled.enabled) {
2591 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2597 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2602 void __kmp_save_internal_controls(kmp_info_t *thread) {
2604 if (thread->th.th_team != thread->th.th_serial_team) {
2607 if (thread->th.th_team->t.t_serialized > 1) {
2610 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2613 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2614 thread->th.th_team->t.t_serialized) {
2619 kmp_internal_control_t *control =
2620 (kmp_internal_control_t *)__kmp_allocate(
2621 sizeof(kmp_internal_control_t));
2623 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2625 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2627 control->next = thread->th.th_team->t.t_control_stack_top;
2628 thread->th.th_team->t.t_control_stack_top = control;
2634 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2638 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2639 KMP_DEBUG_ASSERT(__kmp_init_serial);
2643 else if (new_nth > __kmp_max_nth)
2644 new_nth = __kmp_max_nth;
2647 thread = __kmp_threads[gtid];
2648 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2651 __kmp_save_internal_controls(thread);
2653 set__nproc(thread, new_nth);
2658 root = thread->th.th_root;
2659 if (__kmp_init_parallel && (!root->r.r_active) &&
2660 (root->r.r_hot_team->t.t_nproc > new_nth)
2661 #
if KMP_NESTED_HOT_TEAMS
2662 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2665 kmp_team_t *hot_team = root->r.r_hot_team;
2668 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2671 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2672 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2673 if (__kmp_tasking_mode != tskm_immediate_exec) {
2676 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2678 __kmp_free_thread(hot_team->t.t_threads[f]);
2679 hot_team->t.t_threads[f] = NULL;
2681 hot_team->t.t_nproc = new_nth;
2682 #if KMP_NESTED_HOT_TEAMS
2683 if (thread->th.th_hot_teams) {
2684 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2685 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2689 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2692 for (f = 0; f < new_nth; f++) {
2693 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2694 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2697 hot_team->t.t_size_changed = -1;
2702 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2705 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2707 gtid, max_active_levels));
2708 KMP_DEBUG_ASSERT(__kmp_init_serial);
2711 if (max_active_levels < 0) {
2712 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2717 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2718 "max_active_levels for thread %d = (%d)\n",
2719 gtid, max_active_levels));
2722 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2727 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2728 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2729 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2735 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2736 "max_active_levels for thread %d = (%d)\n",
2737 gtid, max_active_levels));
2739 thread = __kmp_threads[gtid];
2741 __kmp_save_internal_controls(thread);
2743 set__max_active_levels(thread, max_active_levels);
2747 int __kmp_get_max_active_levels(
int gtid) {
2750 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2751 KMP_DEBUG_ASSERT(__kmp_init_serial);
2753 thread = __kmp_threads[gtid];
2754 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2755 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2756 "curtask_maxaclevel=%d\n",
2757 gtid, thread->th.th_current_task,
2758 thread->th.th_current_task->td_icvs.max_active_levels));
2759 return thread->th.th_current_task->td_icvs.max_active_levels;
2762 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2763 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2766 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2768 kmp_sched_t orig_kind;
2771 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2772 gtid, (
int)kind, chunk));
2773 KMP_DEBUG_ASSERT(__kmp_init_serial);
2780 kind = __kmp_sched_without_mods(kind);
2782 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2783 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2785 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2786 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2788 kind = kmp_sched_default;
2792 thread = __kmp_threads[gtid];
2794 __kmp_save_internal_controls(thread);
2796 if (kind < kmp_sched_upper_std) {
2797 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2800 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2802 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2803 __kmp_sch_map[kind - kmp_sched_lower - 1];
2808 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2809 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2810 kmp_sched_lower - 2];
2812 __kmp_sched_apply_mods_intkind(
2813 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2814 if (kind == kmp_sched_auto || chunk < 1) {
2816 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2818 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2823 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2827 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2828 KMP_DEBUG_ASSERT(__kmp_init_serial);
2830 thread = __kmp_threads[gtid];
2832 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2833 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2835 case kmp_sch_static_greedy:
2836 case kmp_sch_static_balanced:
2837 *kind = kmp_sched_static;
2838 __kmp_sched_apply_mods_stdkind(kind, th_type);
2841 case kmp_sch_static_chunked:
2842 *kind = kmp_sched_static;
2844 case kmp_sch_dynamic_chunked:
2845 *kind = kmp_sched_dynamic;
2848 case kmp_sch_guided_iterative_chunked:
2849 case kmp_sch_guided_analytical_chunked:
2850 *kind = kmp_sched_guided;
2853 *kind = kmp_sched_auto;
2855 case kmp_sch_trapezoidal:
2856 *kind = kmp_sched_trapezoidal;
2858 #if KMP_STATIC_STEAL_ENABLED
2859 case kmp_sch_static_steal:
2860 *kind = kmp_sched_static_steal;
2864 KMP_FATAL(UnknownSchedulingType, th_type);
2867 __kmp_sched_apply_mods_stdkind(kind, th_type);
2868 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2871 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2877 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2878 KMP_DEBUG_ASSERT(__kmp_init_serial);
2885 thr = __kmp_threads[gtid];
2886 team = thr->th.th_team;
2887 ii = team->t.t_level;
2891 if (thr->th.th_teams_microtask) {
2893 int tlevel = thr->th.th_teams_level;
2896 KMP_DEBUG_ASSERT(ii >= tlevel);
2908 return __kmp_tid_from_gtid(gtid);
2910 dd = team->t.t_serialized;
2912 while (ii > level) {
2913 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2915 if ((team->t.t_serialized) && (!dd)) {
2916 team = team->t.t_parent;
2920 team = team->t.t_parent;
2921 dd = team->t.t_serialized;
2926 return (dd > 1) ? (0) : (team->t.t_master_tid);
2929 int __kmp_get_team_size(
int gtid,
int level) {
2935 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2936 KMP_DEBUG_ASSERT(__kmp_init_serial);
2943 thr = __kmp_threads[gtid];
2944 team = thr->th.th_team;
2945 ii = team->t.t_level;
2949 if (thr->th.th_teams_microtask) {
2951 int tlevel = thr->th.th_teams_level;
2954 KMP_DEBUG_ASSERT(ii >= tlevel);
2965 while (ii > level) {
2966 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2968 if (team->t.t_serialized && (!dd)) {
2969 team = team->t.t_parent;
2973 team = team->t.t_parent;
2978 return team->t.t_nproc;
2981 kmp_r_sched_t __kmp_get_schedule_global() {
2986 kmp_r_sched_t r_sched;
2992 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
2993 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
2996 r_sched.r_sched_type = __kmp_static;
2999 r_sched.r_sched_type = __kmp_guided;
3001 r_sched.r_sched_type = __kmp_sched;
3003 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3005 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3007 r_sched.chunk = KMP_DEFAULT_CHUNK;
3009 r_sched.chunk = __kmp_chunk;
3017 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3019 KMP_DEBUG_ASSERT(team);
3020 if (!realloc || argc > team->t.t_max_argc) {
3022 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3023 "current entries=%d\n",
3024 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3026 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3027 __kmp_free((
void *)team->t.t_argv);
3029 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3031 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3032 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3034 team->t.t_id, team->t.t_max_argc));
3035 team->t.t_argv = &team->t.t_inline_argv[0];
3036 if (__kmp_storage_map) {
3037 __kmp_print_storage_map_gtid(
3038 -1, &team->t.t_inline_argv[0],
3039 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3040 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3045 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3046 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3048 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3050 team->t.t_id, team->t.t_max_argc));
3052 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3053 if (__kmp_storage_map) {
3054 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3055 &team->t.t_argv[team->t.t_max_argc],
3056 sizeof(
void *) * team->t.t_max_argc,
3057 "team_%d.t_argv", team->t.t_id);
3063 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3065 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3067 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3068 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3069 sizeof(dispatch_shared_info_t) * num_disp_buff);
3070 team->t.t_dispatch =
3071 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3072 team->t.t_implicit_task_taskdata =
3073 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3074 team->t.t_max_nproc = max_nth;
3077 for (i = 0; i < num_disp_buff; ++i) {
3078 team->t.t_disp_buffer[i].buffer_index = i;
3079 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3083 static void __kmp_free_team_arrays(kmp_team_t *team) {
3086 for (i = 0; i < team->t.t_max_nproc; ++i) {
3087 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3088 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3089 team->t.t_dispatch[i].th_disp_buffer = NULL;
3092 #if KMP_USE_HIER_SCHED
3093 __kmp_dispatch_free_hierarchies(team);
3095 __kmp_free(team->t.t_threads);
3096 __kmp_free(team->t.t_disp_buffer);
3097 __kmp_free(team->t.t_dispatch);
3098 __kmp_free(team->t.t_implicit_task_taskdata);
3099 team->t.t_threads = NULL;
3100 team->t.t_disp_buffer = NULL;
3101 team->t.t_dispatch = NULL;
3102 team->t.t_implicit_task_taskdata = 0;
3105 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3106 kmp_info_t **oldThreads = team->t.t_threads;
3108 __kmp_free(team->t.t_disp_buffer);
3109 __kmp_free(team->t.t_dispatch);
3110 __kmp_free(team->t.t_implicit_task_taskdata);
3111 __kmp_allocate_team_arrays(team, max_nth);
3113 KMP_MEMCPY(team->t.t_threads, oldThreads,
3114 team->t.t_nproc *
sizeof(kmp_info_t *));
3116 __kmp_free(oldThreads);
3119 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3121 kmp_r_sched_t r_sched =
3122 __kmp_get_schedule_global();
3124 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3126 kmp_internal_control_t g_icvs = {
3128 (kmp_int8)__kmp_global.g.g_dynamic,
3130 (kmp_int8)__kmp_env_blocktime,
3132 __kmp_dflt_blocktime,
3137 __kmp_dflt_team_nth,
3141 __kmp_dflt_max_active_levels,
3145 __kmp_nested_proc_bind.bind_types[0],
3146 __kmp_default_device,
3153 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3155 kmp_internal_control_t gx_icvs;
3156 gx_icvs.serial_nesting_level =
3158 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3159 gx_icvs.next = NULL;
3164 static void __kmp_initialize_root(kmp_root_t *root) {
3166 kmp_team_t *root_team;
3167 kmp_team_t *hot_team;
3168 int hot_team_max_nth;
3169 kmp_r_sched_t r_sched =
3170 __kmp_get_schedule_global();
3171 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3172 KMP_DEBUG_ASSERT(root);
3173 KMP_ASSERT(!root->r.r_begin);
3176 __kmp_init_lock(&root->r.r_begin_lock);
3177 root->r.r_begin = FALSE;
3178 root->r.r_active = FALSE;
3179 root->r.r_in_parallel = 0;
3180 root->r.r_blocktime = __kmp_dflt_blocktime;
3184 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3187 __kmp_allocate_team(root,
3193 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3195 USE_NESTED_HOT_ARG(NULL)
3200 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3203 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3205 root->r.r_root_team = root_team;
3206 root_team->t.t_control_stack_top = NULL;
3209 root_team->t.t_threads[0] = NULL;
3210 root_team->t.t_nproc = 1;
3211 root_team->t.t_serialized = 1;
3213 root_team->t.t_sched.sched = r_sched.sched;
3216 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3217 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3221 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3224 __kmp_allocate_team(root,
3226 __kmp_dflt_team_nth_ub * 2,
3230 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3232 USE_NESTED_HOT_ARG(NULL)
3234 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3236 root->r.r_hot_team = hot_team;
3237 root_team->t.t_control_stack_top = NULL;
3240 hot_team->t.t_parent = root_team;
3243 hot_team_max_nth = hot_team->t.t_max_nproc;
3244 for (f = 0; f < hot_team_max_nth; ++f) {
3245 hot_team->t.t_threads[f] = NULL;
3247 hot_team->t.t_nproc = 1;
3249 hot_team->t.t_sched.sched = r_sched.sched;
3250 hot_team->t.t_size_changed = 0;
3255 typedef struct kmp_team_list_item {
3256 kmp_team_p
const *entry;
3257 struct kmp_team_list_item *next;
3258 } kmp_team_list_item_t;
3259 typedef kmp_team_list_item_t *kmp_team_list_t;
3261 static void __kmp_print_structure_team_accum(
3262 kmp_team_list_t list,
3263 kmp_team_p
const *team
3273 KMP_DEBUG_ASSERT(list != NULL);
3278 __kmp_print_structure_team_accum(list, team->t.t_parent);
3279 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3283 while (l->next != NULL && l->entry != team) {
3286 if (l->next != NULL) {
3292 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3298 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3299 sizeof(kmp_team_list_item_t));
3306 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3309 __kmp_printf(
"%s", title);
3311 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3313 __kmp_printf(
" - (nil)\n");
3317 static void __kmp_print_structure_thread(
char const *title,
3318 kmp_info_p
const *thread) {
3319 __kmp_printf(
"%s", title);
3320 if (thread != NULL) {
3321 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3323 __kmp_printf(
" - (nil)\n");
3327 void __kmp_print_structure(
void) {
3329 kmp_team_list_t list;
3333 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3337 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3338 "Table\n------------------------------\n");
3341 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3342 __kmp_printf(
"%2d", gtid);
3343 if (__kmp_threads != NULL) {
3344 __kmp_printf(
" %p", __kmp_threads[gtid]);
3346 if (__kmp_root != NULL) {
3347 __kmp_printf(
" %p", __kmp_root[gtid]);
3354 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3356 if (__kmp_threads != NULL) {
3358 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3359 kmp_info_t
const *thread = __kmp_threads[gtid];
3360 if (thread != NULL) {
3361 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3362 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3363 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3364 __kmp_print_structure_team(
" Serial Team: ",
3365 thread->th.th_serial_team);
3366 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3367 __kmp_print_structure_thread(
" Master: ",
3368 thread->th.th_team_master);
3369 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3370 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3371 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3372 __kmp_print_structure_thread(
" Next in pool: ",
3373 thread->th.th_next_pool);
3375 __kmp_print_structure_team_accum(list, thread->th.th_team);
3376 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3380 __kmp_printf(
"Threads array is not allocated.\n");
3384 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3386 if (__kmp_root != NULL) {
3388 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3389 kmp_root_t
const *root = __kmp_root[gtid];
3391 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3392 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3393 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3394 __kmp_print_structure_thread(
" Uber Thread: ",
3395 root->r.r_uber_thread);
3396 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3397 __kmp_printf(
" In Parallel: %2d\n",
3398 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3400 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3401 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3405 __kmp_printf(
"Ubers array is not allocated.\n");
3408 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3410 while (list->next != NULL) {
3411 kmp_team_p
const *team = list->entry;
3413 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3414 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3415 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3416 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3417 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3418 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3419 for (i = 0; i < team->t.t_nproc; ++i) {
3420 __kmp_printf(
" Thread %2d: ", i);
3421 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3423 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3429 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3431 __kmp_print_structure_thread(
"Thread pool: ",
3432 CCAST(kmp_info_t *, __kmp_thread_pool));
3433 __kmp_print_structure_team(
"Team pool: ",
3434 CCAST(kmp_team_t *, __kmp_team_pool));
3438 while (list != NULL) {
3439 kmp_team_list_item_t *item = list;
3441 KMP_INTERNAL_FREE(item);
3450 static const unsigned __kmp_primes[] = {
3451 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3452 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3453 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3454 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3455 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3456 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3457 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3458 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3459 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3460 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3461 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3465 unsigned short __kmp_get_random(kmp_info_t *thread) {
3466 unsigned x = thread->th.th_x;
3467 unsigned short r = (
unsigned short)(x >> 16);
3469 thread->th.th_x = x * thread->th.th_a + 1;
3471 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3472 thread->th.th_info.ds.ds_tid, r));
3478 void __kmp_init_random(kmp_info_t *thread) {
3479 unsigned seed = thread->th.th_info.ds.ds_tid;
3482 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3483 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3485 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3491 static int __kmp_reclaim_dead_roots(
void) {
3494 for (i = 0; i < __kmp_threads_capacity; ++i) {
3495 if (KMP_UBER_GTID(i) &&
3496 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3499 r += __kmp_unregister_root_other_thread(i);
3524 static int __kmp_expand_threads(
int nNeed) {
3526 int minimumRequiredCapacity;
3528 kmp_info_t **newThreads;
3529 kmp_root_t **newRoot;
3535 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3538 added = __kmp_reclaim_dead_roots();
3567 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3570 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3574 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3576 newCapacity = __kmp_threads_capacity;
3578 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3579 : __kmp_sys_max_nth;
3580 }
while (newCapacity < minimumRequiredCapacity);
3581 newThreads = (kmp_info_t **)__kmp_allocate(
3582 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3584 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3585 KMP_MEMCPY(newThreads, __kmp_threads,
3586 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3587 KMP_MEMCPY(newRoot, __kmp_root,
3588 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3590 kmp_info_t **temp_threads = __kmp_threads;
3591 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3592 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3593 __kmp_free(temp_threads);
3594 added += newCapacity - __kmp_threads_capacity;
3595 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3597 if (newCapacity > __kmp_tp_capacity) {
3598 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3599 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3600 __kmp_threadprivate_resize_cache(newCapacity);
3602 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3604 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3613 int __kmp_register_root(
int initial_thread) {
3614 kmp_info_t *root_thread;
3618 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3619 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3636 capacity = __kmp_threads_capacity;
3637 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3644 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3645 capacity -= __kmp_hidden_helper_threads_num;
3649 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3650 if (__kmp_tp_cached) {
3651 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3652 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3653 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3655 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3665 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3668 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3669 gtid <= __kmp_hidden_helper_threads_num;
3672 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3673 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3674 "hidden helper thread: T#%d\n",
3680 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3683 for (gtid = __kmp_hidden_helper_threads_num + 1;
3684 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3688 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3689 KMP_ASSERT(gtid < __kmp_threads_capacity);
3694 TCW_4(__kmp_nth, __kmp_nth + 1);
3698 if (__kmp_adjust_gtid_mode) {
3699 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3700 if (TCR_4(__kmp_gtid_mode) != 2) {
3701 TCW_4(__kmp_gtid_mode, 2);
3704 if (TCR_4(__kmp_gtid_mode) != 1) {
3705 TCW_4(__kmp_gtid_mode, 1);
3710 #ifdef KMP_ADJUST_BLOCKTIME
3713 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3714 if (__kmp_nth > __kmp_avail_proc) {
3715 __kmp_zero_bt = TRUE;
3721 if (!(root = __kmp_root[gtid])) {
3722 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3723 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3726 #if KMP_STATS_ENABLED
3728 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3729 __kmp_stats_thread_ptr->startLife();
3730 KMP_SET_THREAD_STATE(SERIAL_REGION);
3733 __kmp_initialize_root(root);
3736 if (root->r.r_uber_thread) {
3737 root_thread = root->r.r_uber_thread;
3739 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3740 if (__kmp_storage_map) {
3741 __kmp_print_thread_storage_map(root_thread, gtid);
3743 root_thread->th.th_info.ds.ds_gtid = gtid;
3745 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3747 root_thread->th.th_root = root;
3748 if (__kmp_env_consistency_check) {
3749 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3752 __kmp_initialize_fast_memory(root_thread);
3756 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3757 __kmp_initialize_bget(root_thread);
3759 __kmp_init_random(root_thread);
3763 if (!root_thread->th.th_serial_team) {
3764 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3765 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3766 root_thread->th.th_serial_team = __kmp_allocate_team(
3771 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3773 KMP_ASSERT(root_thread->th.th_serial_team);
3774 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3775 root_thread->th.th_serial_team));
3778 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3780 root->r.r_root_team->t.t_threads[0] = root_thread;
3781 root->r.r_hot_team->t.t_threads[0] = root_thread;
3782 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3784 root_thread->th.th_serial_team->t.t_serialized = 0;
3785 root->r.r_uber_thread = root_thread;
3788 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3789 TCW_4(__kmp_init_gtid, TRUE);
3792 __kmp_gtid_set_specific(gtid);
3795 __kmp_itt_thread_name(gtid);
3798 #ifdef KMP_TDATA_GTID
3801 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3802 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3804 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3806 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3807 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3808 KMP_INIT_BARRIER_STATE));
3811 for (b = 0; b < bs_last_barrier; ++b) {
3812 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3814 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3818 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3819 KMP_INIT_BARRIER_STATE);
3821 #if KMP_AFFINITY_SUPPORTED
3822 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3823 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3824 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3825 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3826 if (TCR_4(__kmp_init_middle)) {
3827 __kmp_affinity_set_init_mask(gtid, TRUE);
3830 root_thread->th.th_def_allocator = __kmp_def_allocator;
3831 root_thread->th.th_prev_level = 0;
3832 root_thread->th.th_prev_num_threads = 1;
3834 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3835 tmp->cg_root = root_thread;
3836 tmp->cg_thread_limit = __kmp_cg_max_nth;
3837 tmp->cg_nthreads = 1;
3838 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3839 " cg_nthreads init to 1\n",
3842 root_thread->th.th_cg_roots = tmp;
3844 __kmp_root_counter++;
3847 if (!initial_thread && ompt_enabled.enabled) {
3849 kmp_info_t *root_thread = ompt_get_thread();
3851 ompt_set_thread_state(root_thread, ompt_state_overhead);
3853 if (ompt_enabled.ompt_callback_thread_begin) {
3854 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3855 ompt_thread_initial, __ompt_get_thread_data_internal());
3857 ompt_data_t *task_data;
3858 ompt_data_t *parallel_data;
3859 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3860 if (ompt_enabled.ompt_callback_implicit_task) {
3861 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3862 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3865 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3870 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3875 #if KMP_NESTED_HOT_TEAMS
3876 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3877 const int max_level) {
3879 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3880 if (!hot_teams || !hot_teams[level].hot_team) {
3883 KMP_DEBUG_ASSERT(level < max_level);
3884 kmp_team_t *team = hot_teams[level].hot_team;
3885 nth = hot_teams[level].hot_team_nth;
3887 if (level < max_level - 1) {
3888 for (i = 0; i < nth; ++i) {
3889 kmp_info_t *th = team->t.t_threads[i];
3890 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3891 if (i > 0 && th->th.th_hot_teams) {
3892 __kmp_free(th->th.th_hot_teams);
3893 th->th.th_hot_teams = NULL;
3897 __kmp_free_team(root, team, NULL);
3904 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3905 kmp_team_t *root_team = root->r.r_root_team;
3906 kmp_team_t *hot_team = root->r.r_hot_team;
3907 int n = hot_team->t.t_nproc;
3910 KMP_DEBUG_ASSERT(!root->r.r_active);
3912 root->r.r_root_team = NULL;
3913 root->r.r_hot_team = NULL;
3916 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3917 #if KMP_NESTED_HOT_TEAMS
3918 if (__kmp_hot_teams_max_level >
3920 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3921 kmp_info_t *th = hot_team->t.t_threads[i];
3922 if (__kmp_hot_teams_max_level > 1) {
3923 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3925 if (th->th.th_hot_teams) {
3926 __kmp_free(th->th.th_hot_teams);
3927 th->th.th_hot_teams = NULL;
3932 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3937 if (__kmp_tasking_mode != tskm_immediate_exec) {
3938 __kmp_wait_to_unref_task_teams();
3944 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3946 (LPVOID) & (root->r.r_uber_thread->th),
3947 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3948 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3952 ompt_data_t *task_data;
3953 ompt_data_t *parallel_data;
3954 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data, NULL);
3955 if (ompt_enabled.ompt_callback_implicit_task) {
3956 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3957 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
3959 if (ompt_enabled.ompt_callback_thread_end) {
3960 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3961 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3967 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
3968 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
3970 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
3971 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
3974 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
3975 root->r.r_uber_thread->th.th_cg_roots->cg_root);
3976 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
3977 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
3978 root->r.r_uber_thread->th.th_cg_roots = NULL;
3980 __kmp_reap_thread(root->r.r_uber_thread, 1);
3984 root->r.r_uber_thread = NULL;
3986 root->r.r_begin = FALSE;
3991 void __kmp_unregister_root_current_thread(
int gtid) {
3992 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3996 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3997 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3998 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4001 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4004 kmp_root_t *root = __kmp_root[gtid];
4006 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4007 KMP_ASSERT(KMP_UBER_GTID(gtid));
4008 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4009 KMP_ASSERT(root->r.r_active == FALSE);
4013 kmp_info_t *thread = __kmp_threads[gtid];
4014 kmp_team_t *team = thread->th.th_team;
4015 kmp_task_team_t *task_team = thread->th.th_task_team;
4018 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
4021 thread->th.ompt_thread_info.state = ompt_state_undefined;
4023 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4026 __kmp_reset_root(gtid, root);
4030 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4032 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4039 static int __kmp_unregister_root_other_thread(
int gtid) {
4040 kmp_root_t *root = __kmp_root[gtid];
4043 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4044 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4045 KMP_ASSERT(KMP_UBER_GTID(gtid));
4046 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4047 KMP_ASSERT(root->r.r_active == FALSE);
4049 r = __kmp_reset_root(gtid, root);
4051 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4057 void __kmp_task_info() {
4059 kmp_int32 gtid = __kmp_entry_gtid();
4060 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4061 kmp_info_t *this_thr = __kmp_threads[gtid];
4062 kmp_team_t *steam = this_thr->th.th_serial_team;
4063 kmp_team_t *team = this_thr->th.th_team;
4066 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4068 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4069 team->t.t_implicit_task_taskdata[tid].td_parent);
4076 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4077 int tid,
int gtid) {
4081 kmp_info_t *master = team->t.t_threads[0];
4082 KMP_DEBUG_ASSERT(this_thr != NULL);
4083 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4084 KMP_DEBUG_ASSERT(team);
4085 KMP_DEBUG_ASSERT(team->t.t_threads);
4086 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4087 KMP_DEBUG_ASSERT(master);
4088 KMP_DEBUG_ASSERT(master->th.th_root);
4092 TCW_SYNC_PTR(this_thr->th.th_team, team);
4094 this_thr->th.th_info.ds.ds_tid = tid;
4095 this_thr->th.th_set_nproc = 0;
4096 if (__kmp_tasking_mode != tskm_immediate_exec)
4099 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4101 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4102 this_thr->th.th_set_proc_bind = proc_bind_default;
4103 #if KMP_AFFINITY_SUPPORTED
4104 this_thr->th.th_new_place = this_thr->th.th_current_place;
4106 this_thr->th.th_root = master->th.th_root;
4109 this_thr->th.th_team_nproc = team->t.t_nproc;
4110 this_thr->th.th_team_master = master;
4111 this_thr->th.th_team_serialized = team->t.t_serialized;
4112 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4114 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4116 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4117 tid, gtid, this_thr, this_thr->th.th_current_task));
4119 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4122 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4123 tid, gtid, this_thr, this_thr->th.th_current_task));
4128 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4130 this_thr->th.th_local.this_construct = 0;
4132 if (!this_thr->th.th_pri_common) {
4133 this_thr->th.th_pri_common =
4134 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4135 if (__kmp_storage_map) {
4136 __kmp_print_storage_map_gtid(
4137 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4138 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4140 this_thr->th.th_pri_head = NULL;
4143 if (this_thr != master &&
4144 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4146 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4147 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4150 int i = tmp->cg_nthreads--;
4151 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4152 " on node %p of thread %p to %d\n",
4153 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4158 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4160 this_thr->th.th_cg_roots->cg_nthreads++;
4161 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4162 " node %p of thread %p to %d\n",
4163 this_thr, this_thr->th.th_cg_roots,
4164 this_thr->th.th_cg_roots->cg_root,
4165 this_thr->th.th_cg_roots->cg_nthreads));
4166 this_thr->th.th_current_task->td_icvs.thread_limit =
4167 this_thr->th.th_cg_roots->cg_thread_limit;
4172 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4175 sizeof(dispatch_private_info_t) *
4176 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4177 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4178 team->t.t_max_nproc));
4179 KMP_ASSERT(dispatch);
4180 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4181 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4183 dispatch->th_disp_index = 0;
4184 dispatch->th_doacross_buf_idx = 0;
4185 if (!dispatch->th_disp_buffer) {
4186 dispatch->th_disp_buffer =
4187 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4189 if (__kmp_storage_map) {
4190 __kmp_print_storage_map_gtid(
4191 gtid, &dispatch->th_disp_buffer[0],
4192 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4194 : __kmp_dispatch_num_buffers],
4195 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4196 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4197 gtid, team->t.t_id, gtid);
4200 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4203 dispatch->th_dispatch_pr_current = 0;
4204 dispatch->th_dispatch_sh_current = 0;
4206 dispatch->th_deo_fcn = 0;
4207 dispatch->th_dxo_fcn = 0;
4210 this_thr->th.th_next_pool = NULL;
4212 if (!this_thr->th.th_task_state_memo_stack) {
4214 this_thr->th.th_task_state_memo_stack =
4215 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4216 this_thr->th.th_task_state_top = 0;
4217 this_thr->th.th_task_state_stack_sz = 4;
4218 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4220 this_thr->th.th_task_state_memo_stack[i] = 0;
4223 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4224 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4234 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4236 kmp_team_t *serial_team;
4237 kmp_info_t *new_thr;
4240 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4241 KMP_DEBUG_ASSERT(root && team);
4242 #if !KMP_NESTED_HOT_TEAMS
4243 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4248 if (__kmp_thread_pool) {
4249 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4250 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4251 if (new_thr == __kmp_thread_pool_insert_pt) {
4252 __kmp_thread_pool_insert_pt = NULL;
4254 TCW_4(new_thr->th.th_in_pool, FALSE);
4255 __kmp_suspend_initialize_thread(new_thr);
4256 __kmp_lock_suspend_mx(new_thr);
4257 if (new_thr->th.th_active_in_pool == TRUE) {
4258 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4259 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4260 new_thr->th.th_active_in_pool = FALSE;
4262 __kmp_unlock_suspend_mx(new_thr);
4264 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4265 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4266 KMP_ASSERT(!new_thr->th.th_team);
4267 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4270 __kmp_initialize_info(new_thr, team, new_tid,
4271 new_thr->th.th_info.ds.ds_gtid);
4272 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4274 TCW_4(__kmp_nth, __kmp_nth + 1);
4276 new_thr->th.th_task_state = 0;
4277 new_thr->th.th_task_state_top = 0;
4278 new_thr->th.th_task_state_stack_sz = 4;
4280 #ifdef KMP_ADJUST_BLOCKTIME
4283 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4284 if (__kmp_nth > __kmp_avail_proc) {
4285 __kmp_zero_bt = TRUE;
4294 kmp_balign_t *balign = new_thr->th.th_bar;
4295 for (b = 0; b < bs_last_barrier; ++b)
4296 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4299 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4300 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4307 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4308 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4313 if (!TCR_4(__kmp_init_monitor)) {
4314 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4315 if (!TCR_4(__kmp_init_monitor)) {
4316 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4317 TCW_4(__kmp_init_monitor, 1);
4318 __kmp_create_monitor(&__kmp_monitor);
4319 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4330 while (TCR_4(__kmp_init_monitor) < 2) {
4333 KF_TRACE(10, (
"after monitor thread has started\n"));
4336 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4343 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4345 : __kmp_hidden_helper_threads_num + 1;
4347 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4349 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4352 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4353 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4358 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4360 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4362 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4365 __itt_suppress_mark_range(
4366 __itt_suppress_range, __itt_suppress_threading_errors,
4367 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4368 __itt_suppress_mark_range(
4369 __itt_suppress_range, __itt_suppress_threading_errors,
4370 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4372 __itt_suppress_mark_range(
4373 __itt_suppress_range, __itt_suppress_threading_errors,
4374 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4376 __itt_suppress_mark_range(__itt_suppress_range,
4377 __itt_suppress_threading_errors,
4378 &new_thr->th.th_suspend_init_count,
4379 sizeof(new_thr->th.th_suspend_init_count));
4382 __itt_suppress_mark_range(__itt_suppress_range,
4383 __itt_suppress_threading_errors,
4384 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4385 sizeof(new_thr->th.th_bar[0].bb.b_go));
4386 __itt_suppress_mark_range(__itt_suppress_range,
4387 __itt_suppress_threading_errors,
4388 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4389 sizeof(new_thr->th.th_bar[1].bb.b_go));
4390 __itt_suppress_mark_range(__itt_suppress_range,
4391 __itt_suppress_threading_errors,
4392 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4393 sizeof(new_thr->th.th_bar[2].bb.b_go));
4395 if (__kmp_storage_map) {
4396 __kmp_print_thread_storage_map(new_thr, new_gtid);
4401 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4402 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4403 new_thr->th.th_serial_team = serial_team =
4404 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4408 proc_bind_default, &r_icvs,
4409 0 USE_NESTED_HOT_ARG(NULL));
4411 KMP_ASSERT(serial_team);
4412 serial_team->t.t_serialized = 0;
4414 serial_team->t.t_threads[0] = new_thr;
4416 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4420 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4423 __kmp_initialize_fast_memory(new_thr);
4427 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4428 __kmp_initialize_bget(new_thr);
4431 __kmp_init_random(new_thr);
4435 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4436 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4439 kmp_balign_t *balign = new_thr->th.th_bar;
4440 for (b = 0; b < bs_last_barrier; ++b) {
4441 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4442 balign[b].bb.team = NULL;
4443 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4444 balign[b].bb.use_oncore_barrier = 0;
4447 new_thr->th.th_spin_here = FALSE;
4448 new_thr->th.th_next_waiting = 0;
4450 new_thr->th.th_blocking =
false;
4453 #if KMP_AFFINITY_SUPPORTED
4454 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4455 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4456 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4457 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4459 new_thr->th.th_def_allocator = __kmp_def_allocator;
4460 new_thr->th.th_prev_level = 0;
4461 new_thr->th.th_prev_num_threads = 1;
4463 TCW_4(new_thr->th.th_in_pool, FALSE);
4464 new_thr->th.th_active_in_pool = FALSE;
4465 TCW_4(new_thr->th.th_active, TRUE);
4473 if (__kmp_adjust_gtid_mode) {
4474 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4475 if (TCR_4(__kmp_gtid_mode) != 2) {
4476 TCW_4(__kmp_gtid_mode, 2);
4479 if (TCR_4(__kmp_gtid_mode) != 1) {
4480 TCW_4(__kmp_gtid_mode, 1);
4485 #ifdef KMP_ADJUST_BLOCKTIME
4488 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4489 if (__kmp_nth > __kmp_avail_proc) {
4490 __kmp_zero_bt = TRUE;
4497 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4498 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4500 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4502 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4513 static void __kmp_reinitialize_team(kmp_team_t *team,
4514 kmp_internal_control_t *new_icvs,
4516 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4517 team->t.t_threads[0], team));
4518 KMP_DEBUG_ASSERT(team && new_icvs);
4519 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4520 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4522 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4524 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4525 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4527 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4528 team->t.t_threads[0], team));
4534 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4535 kmp_internal_control_t *new_icvs,
4537 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4540 KMP_DEBUG_ASSERT(team);
4541 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4542 KMP_DEBUG_ASSERT(team->t.t_threads);
4545 team->t.t_master_tid = 0;
4547 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4548 team->t.t_nproc = new_nproc;
4551 team->t.t_next_pool = NULL;
4555 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4556 team->t.t_invoke = NULL;
4559 team->t.t_sched.sched = new_icvs->sched.sched;
4561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4562 team->t.t_fp_control_saved = FALSE;
4563 team->t.t_x87_fpu_control_word = 0;
4564 team->t.t_mxcsr = 0;
4567 team->t.t_construct = 0;
4569 team->t.t_ordered.dt.t_value = 0;
4570 team->t.t_master_active = FALSE;
4573 team->t.t_copypriv_data = NULL;
4576 team->t.t_copyin_counter = 0;
4579 team->t.t_control_stack_top = NULL;
4581 __kmp_reinitialize_team(team, new_icvs, loc);
4584 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4587 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4590 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4591 if (KMP_AFFINITY_CAPABLE()) {
4593 if (old_mask != NULL) {
4594 status = __kmp_get_system_affinity(old_mask, TRUE);
4597 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4601 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4606 #if KMP_AFFINITY_SUPPORTED
4612 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4614 kmp_info_t *master_th = team->t.t_threads[0];
4615 KMP_DEBUG_ASSERT(master_th != NULL);
4616 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4617 int first_place = master_th->th.th_first_place;
4618 int last_place = master_th->th.th_last_place;
4619 int masters_place = master_th->th.th_current_place;
4620 team->t.t_first_place = first_place;
4621 team->t.t_last_place = last_place;
4623 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4624 "bound to place %d partition = [%d,%d]\n",
4625 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4626 team->t.t_id, masters_place, first_place, last_place));
4628 switch (proc_bind) {
4630 case proc_bind_default:
4633 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4636 case proc_bind_master: {
4638 int n_th = team->t.t_nproc;
4639 for (f = 1; f < n_th; f++) {
4640 kmp_info_t *th = team->t.t_threads[f];
4641 KMP_DEBUG_ASSERT(th != NULL);
4642 th->th.th_first_place = first_place;
4643 th->th.th_last_place = last_place;
4644 th->th.th_new_place = masters_place;
4645 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4646 team->t.t_display_affinity != 1) {
4647 team->t.t_display_affinity = 1;
4650 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d "
4651 "partition = [%d,%d]\n",
4652 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4653 f, masters_place, first_place, last_place));
4657 case proc_bind_close: {
4659 int n_th = team->t.t_nproc;
4661 if (first_place <= last_place) {
4662 n_places = last_place - first_place + 1;
4664 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4666 if (n_th <= n_places) {
4667 int place = masters_place;
4668 for (f = 1; f < n_th; f++) {
4669 kmp_info_t *th = team->t.t_threads[f];
4670 KMP_DEBUG_ASSERT(th != NULL);
4672 if (place == last_place) {
4673 place = first_place;
4674 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4679 th->th.th_first_place = first_place;
4680 th->th.th_last_place = last_place;
4681 th->th.th_new_place = place;
4682 if (__kmp_display_affinity && place != th->th.th_current_place &&
4683 team->t.t_display_affinity != 1) {
4684 team->t.t_display_affinity = 1;
4687 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4688 "partition = [%d,%d]\n",
4689 __kmp_gtid_from_thread(team->t.t_threads[f]),
4690 team->t.t_id, f, place, first_place, last_place));
4693 int S, rem, gap, s_count;
4694 S = n_th / n_places;
4696 rem = n_th - (S * n_places);
4697 gap = rem > 0 ? n_places / rem : n_places;
4698 int place = masters_place;
4700 for (f = 0; f < n_th; f++) {
4701 kmp_info_t *th = team->t.t_threads[f];
4702 KMP_DEBUG_ASSERT(th != NULL);
4704 th->th.th_first_place = first_place;
4705 th->th.th_last_place = last_place;
4706 th->th.th_new_place = place;
4707 if (__kmp_display_affinity && place != th->th.th_current_place &&
4708 team->t.t_display_affinity != 1) {
4709 team->t.t_display_affinity = 1;
4713 if ((s_count == S) && rem && (gap_ct == gap)) {
4715 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4717 if (place == last_place) {
4718 place = first_place;
4719 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4727 }
else if (s_count == S) {
4728 if (place == last_place) {
4729 place = first_place;
4730 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4740 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4741 "partition = [%d,%d]\n",
4742 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4743 th->th.th_new_place, first_place, last_place));
4745 KMP_DEBUG_ASSERT(place == masters_place);
4749 case proc_bind_spread: {
4751 int n_th = team->t.t_nproc;
4754 if (first_place <= last_place) {
4755 n_places = last_place - first_place + 1;
4757 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4759 if (n_th <= n_places) {
4762 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4763 int S = n_places / n_th;
4764 int s_count, rem, gap, gap_ct;
4766 place = masters_place;
4767 rem = n_places - n_th * S;
4768 gap = rem ? n_th / rem : 1;
4771 if (update_master_only == 1)
4773 for (f = 0; f < thidx; f++) {
4774 kmp_info_t *th = team->t.t_threads[f];
4775 KMP_DEBUG_ASSERT(th != NULL);
4777 th->th.th_first_place = place;
4778 th->th.th_new_place = place;
4779 if (__kmp_display_affinity && place != th->th.th_current_place &&
4780 team->t.t_display_affinity != 1) {
4781 team->t.t_display_affinity = 1;
4784 while (s_count < S) {
4785 if (place == last_place) {
4786 place = first_place;
4787 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4794 if (rem && (gap_ct == gap)) {
4795 if (place == last_place) {
4796 place = first_place;
4797 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4805 th->th.th_last_place = place;
4808 if (place == last_place) {
4809 place = first_place;
4810 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4817 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4818 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4819 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4820 f, th->th.th_new_place, th->th.th_first_place,
4821 th->th.th_last_place, __kmp_affinity_num_masks));
4827 double current =
static_cast<double>(masters_place);
4829 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4834 if (update_master_only == 1)
4836 for (f = 0; f < thidx; f++) {
4837 first =
static_cast<int>(current);
4838 last =
static_cast<int>(current + spacing) - 1;
4839 KMP_DEBUG_ASSERT(last >= first);
4840 if (first >= n_places) {
4841 if (masters_place) {
4844 if (first == (masters_place + 1)) {
4845 KMP_DEBUG_ASSERT(f == n_th);
4848 if (last == masters_place) {
4849 KMP_DEBUG_ASSERT(f == (n_th - 1));
4853 KMP_DEBUG_ASSERT(f == n_th);
4858 if (last >= n_places) {
4859 last = (n_places - 1);
4864 KMP_DEBUG_ASSERT(0 <= first);
4865 KMP_DEBUG_ASSERT(n_places > first);
4866 KMP_DEBUG_ASSERT(0 <= last);
4867 KMP_DEBUG_ASSERT(n_places > last);
4868 KMP_DEBUG_ASSERT(last_place >= first_place);
4869 th = team->t.t_threads[f];
4870 KMP_DEBUG_ASSERT(th);
4871 th->th.th_first_place = first;
4872 th->th.th_new_place = place;
4873 th->th.th_last_place = last;
4874 if (__kmp_display_affinity && place != th->th.th_current_place &&
4875 team->t.t_display_affinity != 1) {
4876 team->t.t_display_affinity = 1;
4879 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4880 "partition = [%d,%d], spacing = %.4f\n",
4881 __kmp_gtid_from_thread(team->t.t_threads[f]),
4882 team->t.t_id, f, th->th.th_new_place,
4883 th->th.th_first_place, th->th.th_last_place, spacing));
4887 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4889 int S, rem, gap, s_count;
4890 S = n_th / n_places;
4892 rem = n_th - (S * n_places);
4893 gap = rem > 0 ? n_places / rem : n_places;
4894 int place = masters_place;
4897 if (update_master_only == 1)
4899 for (f = 0; f < thidx; f++) {
4900 kmp_info_t *th = team->t.t_threads[f];
4901 KMP_DEBUG_ASSERT(th != NULL);
4903 th->th.th_first_place = place;
4904 th->th.th_last_place = place;
4905 th->th.th_new_place = place;
4906 if (__kmp_display_affinity && place != th->th.th_current_place &&
4907 team->t.t_display_affinity != 1) {
4908 team->t.t_display_affinity = 1;
4912 if ((s_count == S) && rem && (gap_ct == gap)) {
4914 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4916 if (place == last_place) {
4917 place = first_place;
4918 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4926 }
else if (s_count == S) {
4927 if (place == last_place) {
4928 place = first_place;
4929 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4938 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4939 "partition = [%d,%d]\n",
4940 __kmp_gtid_from_thread(team->t.t_threads[f]),
4941 team->t.t_id, f, th->th.th_new_place,
4942 th->th.th_first_place, th->th.th_last_place));
4944 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4952 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4960 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4962 ompt_data_t ompt_parallel_data,
4964 kmp_proc_bind_t new_proc_bind,
4965 kmp_internal_control_t *new_icvs,
4966 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4967 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4970 int use_hot_team = !root->r.r_active;
4973 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4974 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4975 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4978 #if KMP_NESTED_HOT_TEAMS
4979 kmp_hot_team_ptr_t *hot_teams;
4981 team = master->th.th_team;
4982 level = team->t.t_active_level;
4983 if (master->th.th_teams_microtask) {
4984 if (master->th.th_teams_size.nteams > 1 &&
4987 (microtask_t)__kmp_teams_master ||
4988 master->th.th_teams_level <
4994 hot_teams = master->th.th_hot_teams;
4995 if (level < __kmp_hot_teams_max_level && hot_teams &&
4996 hot_teams[level].hot_team) {
5004 KMP_DEBUG_ASSERT(new_nproc == 1);
5008 if (use_hot_team && new_nproc > 1) {
5009 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5010 #if KMP_NESTED_HOT_TEAMS
5011 team = hot_teams[level].hot_team;
5013 team = root->r.r_hot_team;
5016 if (__kmp_tasking_mode != tskm_immediate_exec) {
5017 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5018 "task_team[1] = %p before reinit\n",
5019 team->t.t_task_team[0], team->t.t_task_team[1]));
5026 if (team->t.t_nproc == new_nproc) {
5027 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5030 if (team->t.t_size_changed == -1) {
5031 team->t.t_size_changed = 1;
5033 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5037 kmp_r_sched_t new_sched = new_icvs->sched;
5039 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5041 __kmp_reinitialize_team(team, new_icvs,
5042 root->r.r_uber_thread->th.th_ident);
5044 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5045 team->t.t_threads[0], team));
5046 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5048 #if KMP_AFFINITY_SUPPORTED
5049 if ((team->t.t_size_changed == 0) &&
5050 (team->t.t_proc_bind == new_proc_bind)) {
5051 if (new_proc_bind == proc_bind_spread) {
5052 __kmp_partition_places(
5055 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5056 "proc_bind = %d, partition = [%d,%d]\n",
5057 team->t.t_id, new_proc_bind, team->t.t_first_place,
5058 team->t.t_last_place));
5060 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5061 __kmp_partition_places(team);
5064 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5066 }
else if (team->t.t_nproc > new_nproc) {
5068 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5071 team->t.t_size_changed = 1;
5072 #if KMP_NESTED_HOT_TEAMS
5073 if (__kmp_hot_teams_mode == 0) {
5076 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5077 hot_teams[level].hot_team_nth = new_nproc;
5080 for (f = new_nproc; f < team->t.t_nproc; f++) {
5081 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5082 if (__kmp_tasking_mode != tskm_immediate_exec) {
5085 team->t.t_threads[f]->th.th_task_team = NULL;
5087 __kmp_free_thread(team->t.t_threads[f]);
5088 team->t.t_threads[f] = NULL;
5090 #if KMP_NESTED_HOT_TEAMS
5095 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5096 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5097 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5098 for (
int b = 0; b < bs_last_barrier; ++b) {
5099 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5100 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5102 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5107 team->t.t_nproc = new_nproc;
5109 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5110 __kmp_reinitialize_team(team, new_icvs,
5111 root->r.r_uber_thread->th.th_ident);
5114 for (f = 0; f < new_nproc; ++f) {
5115 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5120 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5121 team->t.t_threads[0], team));
5123 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5126 for (f = 0; f < team->t.t_nproc; f++) {
5127 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5128 team->t.t_threads[f]->th.th_team_nproc ==
5133 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5134 #if KMP_AFFINITY_SUPPORTED
5135 __kmp_partition_places(team);
5138 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5139 kmp_affin_mask_t *old_mask;
5140 if (KMP_AFFINITY_CAPABLE()) {
5141 KMP_CPU_ALLOC(old_mask);
5146 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5149 team->t.t_size_changed = 1;
5151 #if KMP_NESTED_HOT_TEAMS
5152 int avail_threads = hot_teams[level].hot_team_nth;
5153 if (new_nproc < avail_threads)
5154 avail_threads = new_nproc;
5155 kmp_info_t **other_threads = team->t.t_threads;
5156 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5160 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5161 for (b = 0; b < bs_last_barrier; ++b) {
5162 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5163 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5165 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5169 if (hot_teams[level].hot_team_nth >= new_nproc) {
5172 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5173 team->t.t_nproc = new_nproc;
5179 hot_teams[level].hot_team_nth = new_nproc;
5181 if (team->t.t_max_nproc < new_nproc) {
5183 __kmp_reallocate_team_arrays(team, new_nproc);
5184 __kmp_reinitialize_team(team, new_icvs, NULL);
5187 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5192 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5196 for (f = team->t.t_nproc; f < new_nproc; f++) {
5197 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5198 KMP_DEBUG_ASSERT(new_worker);
5199 team->t.t_threads[f] = new_worker;
5202 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5203 "join=%llu, plain=%llu\n",
5204 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5205 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5206 team->t.t_bar[bs_plain_barrier].b_arrived));
5210 kmp_balign_t *balign = new_worker->th.th_bar;
5211 for (b = 0; b < bs_last_barrier; ++b) {
5212 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5213 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5214 KMP_BARRIER_PARENT_FLAG);
5216 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5222 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5223 if (KMP_AFFINITY_CAPABLE()) {
5225 __kmp_set_system_affinity(old_mask, TRUE);
5226 KMP_CPU_FREE(old_mask);
5229 #if KMP_NESTED_HOT_TEAMS
5233 int old_nproc = team->t.t_nproc;
5235 __kmp_initialize_team(team, new_nproc, new_icvs,
5236 root->r.r_uber_thread->th.th_ident);
5239 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5240 for (f = 0; f < team->t.t_nproc; ++f)
5241 __kmp_initialize_info(team->t.t_threads[f], team, f,
5242 __kmp_gtid_from_tid(f, team));
5250 for (f = old_nproc; f < team->t.t_nproc; ++f)
5251 team->t.t_threads[f]->th.th_task_state =
5252 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5254 kmp_uint8 old_state =
5255 team->t.t_threads[0]->th.th_task_state;
5256 for (f = old_nproc; f < team->t.t_nproc; ++f)
5257 team->t.t_threads[f]->th.th_task_state = old_state;
5261 for (f = 0; f < team->t.t_nproc; ++f) {
5262 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5263 team->t.t_threads[f]->th.th_team_nproc ==
5268 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5269 #if KMP_AFFINITY_SUPPORTED
5270 __kmp_partition_places(team);
5274 kmp_info_t *master = team->t.t_threads[0];
5275 if (master->th.th_teams_microtask) {
5276 for (f = 1; f < new_nproc; ++f) {
5278 kmp_info_t *thr = team->t.t_threads[f];
5279 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5280 thr->th.th_teams_level = master->th.th_teams_level;
5281 thr->th.th_teams_size = master->th.th_teams_size;
5284 #if KMP_NESTED_HOT_TEAMS
5288 for (f = 1; f < new_nproc; ++f) {
5289 kmp_info_t *thr = team->t.t_threads[f];
5291 kmp_balign_t *balign = thr->th.th_bar;
5292 for (b = 0; b < bs_last_barrier; ++b) {
5293 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5294 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5296 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5304 __kmp_alloc_argv_entries(argc, team, TRUE);
5305 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5309 KF_TRACE(10, (
" hot_team = %p\n", team));
5312 if (__kmp_tasking_mode != tskm_immediate_exec) {
5313 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5314 "task_team[1] = %p after reinit\n",
5315 team->t.t_task_team[0], team->t.t_task_team[1]));
5320 __ompt_team_assign_id(team, ompt_parallel_data);
5330 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5333 if (team->t.t_max_nproc >= max_nproc) {
5335 __kmp_team_pool = team->t.t_next_pool;
5338 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5340 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5341 "task_team[1] %p to NULL\n",
5342 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5343 team->t.t_task_team[0] = NULL;
5344 team->t.t_task_team[1] = NULL;
5347 __kmp_alloc_argv_entries(argc, team, TRUE);
5348 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5351 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5352 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5355 for (b = 0; b < bs_last_barrier; ++b) {
5356 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5358 team->t.t_bar[b].b_master_arrived = 0;
5359 team->t.t_bar[b].b_team_arrived = 0;
5364 team->t.t_proc_bind = new_proc_bind;
5366 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5370 __ompt_team_assign_id(team, ompt_parallel_data);
5382 team = __kmp_reap_team(team);
5383 __kmp_team_pool = team;
5388 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5391 team->t.t_max_nproc = max_nproc;
5394 __kmp_allocate_team_arrays(team, max_nproc);
5396 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5397 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5399 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5401 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5402 team->t.t_task_team[0] = NULL;
5404 team->t.t_task_team[1] = NULL;
5407 if (__kmp_storage_map) {
5408 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5412 __kmp_alloc_argv_entries(argc, team, FALSE);
5413 team->t.t_argc = argc;
5416 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5417 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5420 for (b = 0; b < bs_last_barrier; ++b) {
5421 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5423 team->t.t_bar[b].b_master_arrived = 0;
5424 team->t.t_bar[b].b_team_arrived = 0;
5429 team->t.t_proc_bind = new_proc_bind;
5432 __ompt_team_assign_id(team, ompt_parallel_data);
5433 team->t.ompt_serialized_team_info = NULL;
5438 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5449 void __kmp_free_team(kmp_root_t *root,
5450 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5452 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5456 KMP_DEBUG_ASSERT(root);
5457 KMP_DEBUG_ASSERT(team);
5458 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5459 KMP_DEBUG_ASSERT(team->t.t_threads);
5461 int use_hot_team = team == root->r.r_hot_team;
5462 #if KMP_NESTED_HOT_TEAMS
5464 kmp_hot_team_ptr_t *hot_teams;
5466 level = team->t.t_active_level - 1;
5467 if (master->th.th_teams_microtask) {
5468 if (master->th.th_teams_size.nteams > 1) {
5472 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5473 master->th.th_teams_level == team->t.t_level) {
5478 hot_teams = master->th.th_hot_teams;
5479 if (level < __kmp_hot_teams_max_level) {
5480 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5487 TCW_SYNC_PTR(team->t.t_pkfn,
5490 team->t.t_copyin_counter = 0;
5495 if (!use_hot_team) {
5496 if (__kmp_tasking_mode != tskm_immediate_exec) {
5498 for (f = 1; f < team->t.t_nproc; ++f) {
5499 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5500 kmp_info_t *th = team->t.t_threads[f];
5501 volatile kmp_uint32 *state = &th->th.th_reap_state;
5502 while (*state != KMP_SAFE_TO_REAP) {
5506 if (!__kmp_is_thread_alive(th, &ecode)) {
5507 *state = KMP_SAFE_TO_REAP;
5512 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5513 if (fl.is_sleeping())
5514 fl.resume(__kmp_gtid_from_thread(th));
5521 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5522 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5523 if (task_team != NULL) {
5524 for (f = 0; f < team->t.t_nproc; ++f) {
5525 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5526 team->t.t_threads[f]->th.th_task_team = NULL;
5530 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5531 __kmp_get_gtid(), task_team, team->t.t_id));
5532 #if KMP_NESTED_HOT_TEAMS
5533 __kmp_free_task_team(master, task_team);
5535 team->t.t_task_team[tt_idx] = NULL;
5541 team->t.t_parent = NULL;
5542 team->t.t_level = 0;
5543 team->t.t_active_level = 0;
5546 for (f = 1; f < team->t.t_nproc; ++f) {
5547 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5548 __kmp_free_thread(team->t.t_threads[f]);
5549 team->t.t_threads[f] = NULL;
5554 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5555 __kmp_team_pool = (
volatile kmp_team_t *)team;
5558 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5559 team->t.t_threads[1]->th.th_cg_roots);
5560 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5562 for (f = 1; f < team->t.t_nproc; ++f) {
5563 kmp_info_t *thr = team->t.t_threads[f];
5564 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5565 thr->th.th_cg_roots->cg_root == thr);
5567 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5568 thr->th.th_cg_roots = tmp->up;
5569 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5570 " up to node %p. cg_nthreads was %d\n",
5571 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5572 int i = tmp->cg_nthreads--;
5577 if (thr->th.th_cg_roots)
5578 thr->th.th_current_task->td_icvs.thread_limit =
5579 thr->th.th_cg_roots->cg_thread_limit;
5588 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5589 kmp_team_t *next_pool = team->t.t_next_pool;
5591 KMP_DEBUG_ASSERT(team);
5592 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5593 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5594 KMP_DEBUG_ASSERT(team->t.t_threads);
5595 KMP_DEBUG_ASSERT(team->t.t_argv);
5600 __kmp_free_team_arrays(team);
5601 if (team->t.t_argv != &team->t.t_inline_argv[0])
5602 __kmp_free((
void *)team->t.t_argv);
5634 void __kmp_free_thread(kmp_info_t *this_th) {
5638 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5639 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5641 KMP_DEBUG_ASSERT(this_th);
5646 kmp_balign_t *balign = this_th->th.th_bar;
5647 for (b = 0; b < bs_last_barrier; ++b) {
5648 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5649 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5650 balign[b].bb.team = NULL;
5651 balign[b].bb.leaf_kids = 0;
5653 this_th->th.th_task_state = 0;
5654 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5657 TCW_PTR(this_th->th.th_team, NULL);
5658 TCW_PTR(this_th->th.th_root, NULL);
5659 TCW_PTR(this_th->th.th_dispatch, NULL);
5661 while (this_th->th.th_cg_roots) {
5662 this_th->th.th_cg_roots->cg_nthreads--;
5663 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5664 " %p of thread %p to %d\n",
5665 this_th, this_th->th.th_cg_roots,
5666 this_th->th.th_cg_roots->cg_root,
5667 this_th->th.th_cg_roots->cg_nthreads));
5668 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5669 if (tmp->cg_root == this_th) {
5670 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5672 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5673 this_th->th.th_cg_roots = tmp->up;
5676 if (tmp->cg_nthreads == 0) {
5679 this_th->th.th_cg_roots = NULL;
5689 __kmp_free_implicit_task(this_th);
5690 this_th->th.th_current_task = NULL;
5694 gtid = this_th->th.th_info.ds.ds_gtid;
5695 if (__kmp_thread_pool_insert_pt != NULL) {
5696 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5697 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5698 __kmp_thread_pool_insert_pt = NULL;
5707 if (__kmp_thread_pool_insert_pt != NULL) {
5708 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5710 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5712 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5713 scan = &((*scan)->th.th_next_pool))
5718 TCW_PTR(this_th->th.th_next_pool, *scan);
5719 __kmp_thread_pool_insert_pt = *scan = this_th;
5720 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5721 (this_th->th.th_info.ds.ds_gtid <
5722 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5723 TCW_4(this_th->th.th_in_pool, TRUE);
5724 __kmp_suspend_initialize_thread(this_th);
5725 __kmp_lock_suspend_mx(this_th);
5726 if (this_th->th.th_active == TRUE) {
5727 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5728 this_th->th.th_active_in_pool = TRUE;
5732 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5735 __kmp_unlock_suspend_mx(this_th);
5737 TCW_4(__kmp_nth, __kmp_nth - 1);
5739 #ifdef KMP_ADJUST_BLOCKTIME
5742 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5743 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5744 if (__kmp_nth <= __kmp_avail_proc) {
5745 __kmp_zero_bt = FALSE;
5755 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5756 #if OMP_PROFILING_SUPPORT
5757 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5759 if (ProfileTraceFile)
5760 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5763 int gtid = this_thr->th.th_info.ds.ds_gtid;
5765 kmp_team_t **
volatile pteam;
5768 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5770 if (__kmp_env_consistency_check) {
5771 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5775 ompt_data_t *thread_data;
5776 if (ompt_enabled.enabled) {
5777 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5778 *thread_data = ompt_data_none;
5780 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5781 this_thr->th.ompt_thread_info.wait_id = 0;
5782 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5783 this_thr->th.ompt_thread_info.parallel_flags = 0;
5784 if (ompt_enabled.ompt_callback_thread_begin) {
5785 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5786 ompt_thread_worker, thread_data);
5788 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5793 while (!TCR_4(__kmp_global.g.g_done)) {
5794 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5798 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5801 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5804 if (ompt_enabled.enabled) {
5805 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5809 pteam = &this_thr->th.th_team;
5812 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5814 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5817 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5818 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5819 (*pteam)->t.t_pkfn));
5821 updateHWFPControl(*pteam);
5824 if (ompt_enabled.enabled) {
5825 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
5829 rc = (*pteam)->t.t_invoke(gtid);
5833 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5834 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5835 (*pteam)->t.t_pkfn));
5838 if (ompt_enabled.enabled) {
5840 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
5842 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5846 __kmp_join_barrier(gtid);
5849 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5852 if (ompt_enabled.ompt_callback_thread_end) {
5853 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5857 this_thr->th.th_task_team = NULL;
5859 __kmp_common_destroy_gtid(gtid);
5861 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5864 #if OMP_PROFILING_SUPPORT
5865 llvm::timeTraceProfilerFinishThread();
5872 void __kmp_internal_end_dest(
void *specific_gtid) {
5875 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
5877 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5881 __kmp_internal_end_thread(gtid);
5884 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
5886 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5887 __kmp_internal_end_atexit();
5894 void __kmp_internal_end_atexit(
void) {
5895 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5919 __kmp_internal_end_library(-1);
5921 __kmp_close_console();
5925 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5930 KMP_DEBUG_ASSERT(thread != NULL);
5932 gtid = thread->th.th_info.ds.ds_gtid;
5935 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5938 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5942 ANNOTATE_HAPPENS_BEFORE(thread);
5943 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
5945 __kmp_release_64(&flag);
5949 __kmp_reap_worker(thread);
5961 if (thread->th.th_active_in_pool) {
5962 thread->th.th_active_in_pool = FALSE;
5963 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
5964 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
5968 __kmp_free_implicit_task(thread);
5972 __kmp_free_fast_memory(thread);
5975 __kmp_suspend_uninitialize_thread(thread);
5977 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5978 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5983 #ifdef KMP_ADJUST_BLOCKTIME
5986 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5987 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5988 if (__kmp_nth <= __kmp_avail_proc) {
5989 __kmp_zero_bt = FALSE;
5995 if (__kmp_env_consistency_check) {
5996 if (thread->th.th_cons) {
5997 __kmp_free_cons_stack(thread->th.th_cons);
5998 thread->th.th_cons = NULL;
6002 if (thread->th.th_pri_common != NULL) {
6003 __kmp_free(thread->th.th_pri_common);
6004 thread->th.th_pri_common = NULL;
6007 if (thread->th.th_task_state_memo_stack != NULL) {
6008 __kmp_free(thread->th.th_task_state_memo_stack);
6009 thread->th.th_task_state_memo_stack = NULL;
6013 if (thread->th.th_local.bget_data != NULL) {
6014 __kmp_finalize_bget(thread);
6018 #if KMP_AFFINITY_SUPPORTED
6019 if (thread->th.th_affin_mask != NULL) {
6020 KMP_CPU_FREE(thread->th.th_affin_mask);
6021 thread->th.th_affin_mask = NULL;
6025 #if KMP_USE_HIER_SCHED
6026 if (thread->th.th_hier_bar_data != NULL) {
6027 __kmp_free(thread->th.th_hier_bar_data);
6028 thread->th.th_hier_bar_data = NULL;
6032 __kmp_reap_team(thread->th.th_serial_team);
6033 thread->th.th_serial_team = NULL;
6040 static void __kmp_internal_end(
void) {
6044 __kmp_unregister_library();
6051 __kmp_reclaim_dead_roots();
6055 for (i = 0; i < __kmp_threads_capacity; i++)
6057 if (__kmp_root[i]->r.r_active)
6060 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6062 if (i < __kmp_threads_capacity) {
6074 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6075 if (TCR_4(__kmp_init_monitor)) {
6076 __kmp_reap_monitor(&__kmp_monitor);
6077 TCW_4(__kmp_init_monitor, 0);
6079 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6080 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6086 for (i = 0; i < __kmp_threads_capacity; i++) {
6087 if (__kmp_root[i]) {
6090 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6099 while (__kmp_thread_pool != NULL) {
6101 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6102 __kmp_thread_pool = thread->th.th_next_pool;
6104 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6105 thread->th.th_next_pool = NULL;
6106 thread->th.th_in_pool = FALSE;
6107 __kmp_reap_thread(thread, 0);
6109 __kmp_thread_pool_insert_pt = NULL;
6112 while (__kmp_team_pool != NULL) {
6114 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6115 __kmp_team_pool = team->t.t_next_pool;
6117 team->t.t_next_pool = NULL;
6118 __kmp_reap_team(team);
6121 __kmp_reap_task_teams();
6128 for (i = 0; i < __kmp_threads_capacity; i++) {
6129 kmp_info_t *thr = __kmp_threads[i];
6130 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6135 for (i = 0; i < __kmp_threads_capacity; ++i) {
6142 TCW_SYNC_4(__kmp_init_common, FALSE);
6144 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6152 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6153 if (TCR_4(__kmp_init_monitor)) {
6154 __kmp_reap_monitor(&__kmp_monitor);
6155 TCW_4(__kmp_init_monitor, 0);
6157 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6158 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6161 TCW_4(__kmp_init_gtid, FALSE);
6170 void __kmp_internal_end_library(
int gtid_req) {
6177 if (__kmp_global.g.g_abort) {
6178 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6182 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6183 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6190 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6192 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6193 if (gtid == KMP_GTID_SHUTDOWN) {
6194 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6195 "already shutdown\n"));
6197 }
else if (gtid == KMP_GTID_MONITOR) {
6198 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6199 "registered, or system shutdown\n"));
6201 }
else if (gtid == KMP_GTID_DNE) {
6202 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6205 }
else if (KMP_UBER_GTID(gtid)) {
6207 if (__kmp_root[gtid]->r.r_active) {
6208 __kmp_global.g.g_abort = -1;
6209 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6210 __kmp_unregister_library();
6212 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6218 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6219 __kmp_unregister_root_current_thread(gtid);
6226 #ifdef DUMP_DEBUG_ON_EXIT
6227 if (__kmp_debug_buf)
6228 __kmp_dump_debug_buffer();
6233 __kmp_unregister_library();
6238 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6241 if (__kmp_global.g.g_abort) {
6242 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6244 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6247 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6248 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6257 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6260 __kmp_internal_end();
6262 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6263 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6265 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6267 #ifdef DUMP_DEBUG_ON_EXIT
6268 if (__kmp_debug_buf)
6269 __kmp_dump_debug_buffer();
6273 __kmp_close_console();
6276 __kmp_fini_allocator();
6280 void __kmp_internal_end_thread(
int gtid_req) {
6289 if (__kmp_global.g.g_abort) {
6290 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6294 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6295 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6300 if (TCR_4(__kmp_init_hidden_helper)) {
6301 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6303 __kmp_hidden_helper_main_thread_release();
6305 __kmp_hidden_helper_threads_deinitz_wait();
6312 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6314 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6315 if (gtid == KMP_GTID_SHUTDOWN) {
6316 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6317 "already shutdown\n"));
6319 }
else if (gtid == KMP_GTID_MONITOR) {
6320 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6321 "registered, or system shutdown\n"));
6323 }
else if (gtid == KMP_GTID_DNE) {
6324 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6328 }
else if (KMP_UBER_GTID(gtid)) {
6330 if (__kmp_root[gtid]->r.r_active) {
6331 __kmp_global.g.g_abort = -1;
6332 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6334 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6338 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6340 __kmp_unregister_root_current_thread(gtid);
6344 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6347 __kmp_threads[gtid]->th.th_task_team = NULL;
6351 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6357 if (__kmp_pause_status != kmp_hard_paused)
6361 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6366 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6369 if (__kmp_global.g.g_abort) {
6370 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6372 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6375 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6376 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6387 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6389 for (i = 0; i < __kmp_threads_capacity; ++i) {
6390 if (KMP_UBER_GTID(i)) {
6393 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6394 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6395 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6402 __kmp_internal_end();
6404 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6405 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6407 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6409 #ifdef DUMP_DEBUG_ON_EXIT
6410 if (__kmp_debug_buf)
6411 __kmp_dump_debug_buffer();
6418 static long __kmp_registration_flag = 0;
6420 static char *__kmp_registration_str = NULL;
6423 static inline char *__kmp_reg_status_name() {
6429 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6430 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6433 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6437 void __kmp_register_library_startup(
void) {
6439 char *name = __kmp_reg_status_name();
6445 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6446 __kmp_initialize_system_tick();
6448 __kmp_read_system_time(&time.dtime);
6449 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6450 __kmp_registration_str =
6451 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6452 __kmp_registration_flag, KMP_LIBRARY_FILE);
6454 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6455 __kmp_registration_str));
6461 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6462 char *shm_name = __kmp_str_format(
"/%s", name);
6463 int shm_preexist = 0;
6465 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6466 if ((fd1 == -1) && (errno == EEXIST)) {
6469 fd1 = shm_open(shm_name, O_RDWR, 0666);
6472 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6478 }
else if (fd1 == -1) {
6481 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6484 if (shm_preexist == 0) {
6486 if (ftruncate(fd1, SHM_SIZE) == -1) {
6488 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6489 KMP_ERR(errno), __kmp_msg_null);
6493 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6494 if (data1 == MAP_FAILED) {
6496 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6499 if (shm_preexist == 0) {
6500 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6503 value = __kmp_str_format(
"%s", data1);
6504 munmap(data1, SHM_SIZE);
6508 __kmp_env_set(name, __kmp_registration_str, 0);
6510 value = __kmp_env_get(name);
6513 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6520 char *flag_addr_str = NULL;
6521 char *flag_val_str = NULL;
6522 char const *file_name = NULL;
6523 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6524 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6527 long *flag_addr = 0;
6529 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void**, &flag_addr));
6530 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6531 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6535 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6549 file_name =
"unknown library";
6554 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6555 if (!__kmp_str_match_true(duplicate_ok)) {
6557 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6558 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6560 KMP_INTERNAL_FREE(duplicate_ok);
6561 __kmp_duplicate_library_ok = 1;
6566 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6568 shm_unlink(shm_name);
6571 __kmp_env_unset(name);
6574 default: { KMP_DEBUG_ASSERT(0); }
break;
6577 KMP_INTERNAL_FREE((
void *)value);
6578 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6579 KMP_INTERNAL_FREE((
void *)shm_name);
6582 KMP_INTERNAL_FREE((
void *)name);
6586 void __kmp_unregister_library(
void) {
6588 char *name = __kmp_reg_status_name();
6591 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6592 char *shm_name = __kmp_str_format(
"/%s", name);
6593 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6598 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6599 if (data1 != MAP_FAILED) {
6600 value = __kmp_str_format(
"%s", data1);
6601 munmap(data1, SHM_SIZE);
6605 value = __kmp_env_get(name);
6608 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6609 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6610 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6612 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6613 shm_unlink(shm_name);
6615 __kmp_env_unset(name);
6619 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6620 KMP_INTERNAL_FREE(shm_name);
6623 KMP_INTERNAL_FREE(__kmp_registration_str);
6624 KMP_INTERNAL_FREE(value);
6625 KMP_INTERNAL_FREE(name);
6627 __kmp_registration_flag = 0;
6628 __kmp_registration_str = NULL;
6635 #if KMP_MIC_SUPPORTED
6637 static void __kmp_check_mic_type() {
6638 kmp_cpuid_t cpuid_state = {0};
6639 kmp_cpuid_t *cs_p = &cpuid_state;
6640 __kmp_x86_cpuid(1, 0, cs_p);
6642 if ((cs_p->eax & 0xff0) == 0xB10) {
6643 __kmp_mic_type = mic2;
6644 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6645 __kmp_mic_type = mic3;
6647 __kmp_mic_type = non_mic;
6654 static void __kmp_user_level_mwait_init() {
6655 struct kmp_cpuid buf;
6656 __kmp_x86_cpuid(7, 0, &buf);
6657 __kmp_umwait_enabled = ((buf.ecx >> 5) & 1) && __kmp_user_level_mwait;
6658 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6659 __kmp_umwait_enabled));
6661 #elif KMP_HAVE_MWAIT
6662 #ifndef AT_INTELPHIUSERMWAIT
6665 #define AT_INTELPHIUSERMWAIT 10000
6670 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6671 unsigned long getauxval(
unsigned long) {
return 0; }
6673 static void __kmp_user_level_mwait_init() {
6678 if (__kmp_mic_type == mic3) {
6679 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6680 if ((res & 0x1) || __kmp_user_level_mwait) {
6681 __kmp_mwait_enabled = TRUE;
6682 if (__kmp_user_level_mwait) {
6683 KMP_INFORM(EnvMwaitWarn);
6686 __kmp_mwait_enabled = FALSE;
6689 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6690 "__kmp_mwait_enabled = %d\n",
6691 __kmp_mic_type, __kmp_mwait_enabled));
6695 static void __kmp_do_serial_initialize(
void) {
6699 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6701 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6702 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6703 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6704 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6705 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6711 __kmp_validate_locks();
6714 __kmp_init_allocator();
6719 __kmp_register_library_startup();
6722 if (TCR_4(__kmp_global.g.g_done)) {
6723 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6726 __kmp_global.g.g_abort = 0;
6727 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6730 #if KMP_USE_ADAPTIVE_LOCKS
6731 #if KMP_DEBUG_ADAPTIVE_LOCKS
6732 __kmp_init_speculative_stats();
6735 #if KMP_STATS_ENABLED
6738 __kmp_init_lock(&__kmp_global_lock);
6739 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6740 __kmp_init_lock(&__kmp_debug_lock);
6741 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6742 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6743 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6744 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6745 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6746 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6747 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6748 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6749 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6750 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6751 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6752 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6753 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6754 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6755 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6757 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6759 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6763 __kmp_runtime_initialize();
6765 #if KMP_MIC_SUPPORTED
6766 __kmp_check_mic_type();
6773 __kmp_abort_delay = 0;
6777 __kmp_dflt_team_nth_ub = __kmp_xproc;
6778 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6779 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6781 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6782 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6784 __kmp_max_nth = __kmp_sys_max_nth;
6785 __kmp_cg_max_nth = __kmp_sys_max_nth;
6786 __kmp_teams_max_nth = __kmp_xproc;
6787 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6788 __kmp_teams_max_nth = __kmp_sys_max_nth;
6793 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6795 __kmp_monitor_wakeups =
6796 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6797 __kmp_bt_intervals =
6798 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6801 __kmp_library = library_throughput;
6803 __kmp_static = kmp_sch_static_balanced;
6810 #if KMP_FAST_REDUCTION_BARRIER
6811 #define kmp_reduction_barrier_gather_bb ((int)1)
6812 #define kmp_reduction_barrier_release_bb ((int)1)
6813 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
6814 #define kmp_reduction_barrier_release_pat bp_hyper_bar
6816 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6817 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6818 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6819 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6820 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6821 #if KMP_FAST_REDUCTION_BARRIER
6822 if (i == bs_reduction_barrier) {
6824 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6825 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6826 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6827 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6831 #if KMP_FAST_REDUCTION_BARRIER
6832 #undef kmp_reduction_barrier_release_pat
6833 #undef kmp_reduction_barrier_gather_pat
6834 #undef kmp_reduction_barrier_release_bb
6835 #undef kmp_reduction_barrier_gather_bb
6837 #if KMP_MIC_SUPPORTED
6838 if (__kmp_mic_type == mic2) {
6840 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6841 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6843 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6844 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6846 #if KMP_FAST_REDUCTION_BARRIER
6847 if (__kmp_mic_type == mic2) {
6848 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6849 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6856 __kmp_env_checks = TRUE;
6858 __kmp_env_checks = FALSE;
6862 __kmp_foreign_tp = TRUE;
6864 __kmp_global.g.g_dynamic = FALSE;
6865 __kmp_global.g.g_dynamic_mode = dynamic_default;
6867 __kmp_env_initialize(NULL);
6869 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
6870 __kmp_user_level_mwait_init();
6874 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6875 if (__kmp_str_match_true(val)) {
6876 kmp_str_buf_t buffer;
6877 __kmp_str_buf_init(&buffer);
6878 __kmp_i18n_dump_catalog(&buffer);
6879 __kmp_printf(
"%s", buffer.str);
6880 __kmp_str_buf_free(&buffer);
6882 __kmp_env_free(&val);
6885 __kmp_threads_capacity =
6886 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6888 __kmp_tp_capacity = __kmp_default_tp_capacity(
6889 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6894 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6895 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6896 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6897 __kmp_thread_pool = NULL;
6898 __kmp_thread_pool_insert_pt = NULL;
6899 __kmp_team_pool = NULL;
6906 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6908 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6909 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6910 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6913 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6915 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6920 gtid = __kmp_register_root(TRUE);
6921 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6922 KMP_ASSERT(KMP_UBER_GTID(gtid));
6923 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6927 __kmp_common_initialize();
6931 __kmp_register_atfork();
6934 #if !KMP_DYNAMIC_LIB
6938 int rc = atexit(__kmp_internal_end_atexit);
6940 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6946 #if KMP_HANDLE_SIGNALS
6952 __kmp_install_signals(FALSE);
6955 __kmp_install_signals(TRUE);
6960 __kmp_init_counter++;
6962 __kmp_init_serial = TRUE;
6964 if (__kmp_settings) {
6968 if (__kmp_display_env || __kmp_display_env_verbose) {
6969 __kmp_env_print_2();
6978 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6981 void __kmp_serial_initialize(
void) {
6982 if (__kmp_init_serial) {
6985 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6986 if (__kmp_init_serial) {
6987 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6990 __kmp_do_serial_initialize();
6991 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6994 static void __kmp_do_middle_initialize(
void) {
6996 int prev_dflt_team_nth;
6998 if (!__kmp_init_serial) {
6999 __kmp_do_serial_initialize();
7002 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7006 prev_dflt_team_nth = __kmp_dflt_team_nth;
7008 #if KMP_AFFINITY_SUPPORTED
7011 __kmp_affinity_initialize();
7015 for (i = 0; i < __kmp_threads_capacity; i++) {
7016 if (TCR_PTR(__kmp_threads[i]) != NULL) {
7017 __kmp_affinity_set_init_mask(i, TRUE);
7022 KMP_ASSERT(__kmp_xproc > 0);
7023 if (__kmp_avail_proc == 0) {
7024 __kmp_avail_proc = __kmp_xproc;
7030 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7031 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7036 if (__kmp_dflt_team_nth == 0) {
7037 #ifdef KMP_DFLT_NTH_CORES
7039 __kmp_dflt_team_nth = __kmp_ncores;
7040 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7041 "__kmp_ncores (%d)\n",
7042 __kmp_dflt_team_nth));
7045 __kmp_dflt_team_nth = __kmp_avail_proc;
7046 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7047 "__kmp_avail_proc(%d)\n",
7048 __kmp_dflt_team_nth));
7052 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7053 __kmp_dflt_team_nth = KMP_MIN_NTH;
7055 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7056 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7061 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7063 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7068 for (i = 0; i < __kmp_threads_capacity; i++) {
7069 kmp_info_t *thread = __kmp_threads[i];
7072 if (thread->th.th_current_task->td_icvs.nproc != 0)
7075 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7080 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7081 __kmp_dflt_team_nth));
7083 #ifdef KMP_ADJUST_BLOCKTIME
7085 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7086 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7087 if (__kmp_nth > __kmp_avail_proc) {
7088 __kmp_zero_bt = TRUE;
7094 TCW_SYNC_4(__kmp_init_middle, TRUE);
7096 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7099 void __kmp_middle_initialize(
void) {
7100 if (__kmp_init_middle) {
7103 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7104 if (__kmp_init_middle) {
7105 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7108 __kmp_do_middle_initialize();
7109 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7112 void __kmp_parallel_initialize(
void) {
7113 int gtid = __kmp_entry_gtid();
7116 if (TCR_4(__kmp_init_parallel))
7118 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7119 if (TCR_4(__kmp_init_parallel)) {
7120 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7125 if (TCR_4(__kmp_global.g.g_done)) {
7128 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7129 __kmp_infinite_loop();
7135 if (!__kmp_init_middle) {
7136 __kmp_do_middle_initialize();
7138 __kmp_resume_if_hard_paused();
7141 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7142 KMP_ASSERT(KMP_UBER_GTID(gtid));
7144 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7147 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7148 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7149 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7153 #if KMP_HANDLE_SIGNALS
7155 __kmp_install_signals(TRUE);
7159 __kmp_suspend_initialize();
7161 #if defined(USE_LOAD_BALANCE)
7162 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7163 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7166 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7167 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7171 if (__kmp_version) {
7172 __kmp_print_version_2();
7176 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7179 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7181 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7184 void __kmp_hidden_helper_initialize() {
7185 if (TCR_4(__kmp_init_hidden_helper))
7189 if (!TCR_4(__kmp_init_parallel))
7190 __kmp_parallel_initialize();
7194 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7195 if (TCR_4(__kmp_init_hidden_helper)) {
7196 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7201 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7205 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7208 __kmp_do_initialize_hidden_helper_threads();
7211 __kmp_hidden_helper_threads_initz_wait();
7214 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7216 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7221 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7223 kmp_disp_t *dispatch;
7228 this_thr->th.th_local.this_construct = 0;
7229 #if KMP_CACHE_MANAGE
7230 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7232 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7233 KMP_DEBUG_ASSERT(dispatch);
7234 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7238 dispatch->th_disp_index = 0;
7239 dispatch->th_doacross_buf_idx = 0;
7240 if (__kmp_env_consistency_check)
7241 __kmp_push_parallel(gtid, team->t.t_ident);
7246 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7248 if (__kmp_env_consistency_check)
7249 __kmp_pop_parallel(gtid, team->t.t_ident);
7251 __kmp_finish_implicit_task(this_thr);
7254 int __kmp_invoke_task_func(
int gtid) {
7256 int tid = __kmp_tid_from_gtid(gtid);
7257 kmp_info_t *this_thr = __kmp_threads[gtid];
7258 kmp_team_t *team = this_thr->th.th_team;
7260 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7262 if (__itt_stack_caller_create_ptr) {
7263 __kmp_itt_stack_callee_enter(
7265 team->t.t_stack_id);
7268 #if INCLUDE_SSC_MARKS
7269 SSC_MARK_INVOKING();
7274 void **exit_frame_p;
7275 ompt_data_t *my_task_data;
7276 ompt_data_t *my_parallel_data;
7279 if (ompt_enabled.enabled) {
7281 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
7283 exit_frame_p = &dummy;
7287 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7288 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7289 if (ompt_enabled.ompt_callback_implicit_task) {
7290 ompt_team_size = team->t.t_nproc;
7291 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7292 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7293 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7294 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7298 #if KMP_STATS_ENABLED
7300 if (previous_state == stats_state_e::TEAMS_REGION) {
7301 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7303 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7305 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7308 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7309 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7316 *exit_frame_p = NULL;
7317 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7320 #if KMP_STATS_ENABLED
7321 if (previous_state == stats_state_e::TEAMS_REGION) {
7322 KMP_SET_THREAD_STATE(previous_state);
7324 KMP_POP_PARTITIONED_TIMER();
7328 if (__itt_stack_caller_create_ptr) {
7329 __kmp_itt_stack_callee_leave(
7331 team->t.t_stack_id);
7334 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7339 void __kmp_teams_master(
int gtid) {
7341 kmp_info_t *thr = __kmp_threads[gtid];
7342 kmp_team_t *team = thr->th.th_team;
7343 ident_t *loc = team->t.t_ident;
7344 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7345 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7346 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7347 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7348 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7351 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7354 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7355 tmp->cg_nthreads = 1;
7356 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7357 " cg_nthreads to 1\n",
7359 tmp->up = thr->th.th_cg_roots;
7360 thr->th.th_cg_roots = tmp;
7364 #if INCLUDE_SSC_MARKS
7367 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7368 (microtask_t)thr->th.th_teams_microtask,
7369 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7370 #if INCLUDE_SSC_MARKS
7374 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7375 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7378 __kmp_join_call(loc, gtid
7387 int __kmp_invoke_teams_master(
int gtid) {
7388 kmp_info_t *this_thr = __kmp_threads[gtid];
7389 kmp_team_t *team = this_thr->th.th_team;
7391 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7392 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7393 (
void *)__kmp_teams_master);
7395 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7397 int tid = __kmp_tid_from_gtid(gtid);
7398 ompt_data_t *task_data =
7399 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7400 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7401 if (ompt_enabled.ompt_callback_implicit_task) {
7402 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7403 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7405 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7408 __kmp_teams_master(gtid);
7410 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7412 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7421 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7422 kmp_info_t *thr = __kmp_threads[gtid];
7424 if (num_threads > 0)
7425 thr->th.th_set_nproc = num_threads;
7430 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7432 kmp_info_t *thr = __kmp_threads[gtid];
7433 KMP_DEBUG_ASSERT(num_teams >= 0);
7434 KMP_DEBUG_ASSERT(num_threads >= 0);
7438 if (num_teams > __kmp_teams_max_nth) {
7439 if (!__kmp_reserve_warn) {
7440 __kmp_reserve_warn = 1;
7441 __kmp_msg(kmp_ms_warning,
7442 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7443 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7445 num_teams = __kmp_teams_max_nth;
7449 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7452 if (!TCR_4(__kmp_init_middle))
7453 __kmp_middle_initialize();
7454 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7455 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7456 if (num_threads == 0) {
7457 num_threads = __kmp_avail_proc / num_teams;
7461 if (num_threads > __kmp_dflt_team_nth) {
7462 num_threads = __kmp_dflt_team_nth;
7464 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7465 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7467 if (num_teams * num_threads > __kmp_teams_max_nth) {
7468 num_threads = __kmp_teams_max_nth / num_teams;
7473 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7475 if (num_threads > __kmp_dflt_team_nth) {
7476 num_threads = __kmp_dflt_team_nth;
7478 if (num_teams * num_threads > __kmp_teams_max_nth) {
7479 int new_threads = __kmp_teams_max_nth / num_teams;
7480 if (!__kmp_reserve_warn) {
7481 __kmp_reserve_warn = 1;
7482 __kmp_msg(kmp_ms_warning,
7483 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7484 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7486 num_threads = new_threads;
7489 thr->th.th_teams_size.nth = num_threads;
7493 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7494 kmp_info_t *thr = __kmp_threads[gtid];
7495 thr->th.th_set_proc_bind = proc_bind;
7500 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7501 kmp_info_t *this_thr = __kmp_threads[gtid];
7507 KMP_DEBUG_ASSERT(team);
7508 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7509 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7512 team->t.t_construct = 0;
7513 team->t.t_ordered.dt.t_value =
7517 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7518 if (team->t.t_max_nproc > 1) {
7520 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7521 team->t.t_disp_buffer[i].buffer_index = i;
7522 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7525 team->t.t_disp_buffer[0].buffer_index = 0;
7526 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7530 KMP_ASSERT(this_thr->th.th_team == team);
7533 for (f = 0; f < team->t.t_nproc; f++) {
7534 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7535 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7540 __kmp_fork_barrier(gtid, 0);
7543 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7544 kmp_info_t *this_thr = __kmp_threads[gtid];
7546 KMP_DEBUG_ASSERT(team);
7547 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7548 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7554 if (__kmp_threads[gtid] &&
7555 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7556 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7557 __kmp_threads[gtid]);
7558 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7559 "team->t.t_nproc=%d\n",
7560 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7562 __kmp_print_structure();
7564 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7565 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7568 __kmp_join_barrier(gtid);
7570 if (ompt_enabled.enabled &&
7571 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7572 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7573 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7574 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7576 void *codeptr = NULL;
7577 if (KMP_MASTER_TID(ds_tid) &&
7578 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7579 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7580 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7582 if (ompt_enabled.ompt_callback_sync_region_wait) {
7583 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7584 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7587 if (ompt_enabled.ompt_callback_sync_region) {
7588 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7589 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7593 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7594 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7595 ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit);
7601 KMP_ASSERT(this_thr->th.th_team == team);
7606 #ifdef USE_LOAD_BALANCE
7610 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7613 kmp_team_t *hot_team;
7615 if (root->r.r_active) {
7618 hot_team = root->r.r_hot_team;
7619 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7620 return hot_team->t.t_nproc - 1;
7625 for (i = 1; i < hot_team->t.t_nproc; i++) {
7626 if (hot_team->t.t_threads[i]->th.th_active) {
7635 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7638 int hot_team_active;
7639 int team_curr_active;
7642 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7644 KMP_DEBUG_ASSERT(root);
7645 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7646 ->th.th_current_task->td_icvs.dynamic == TRUE);
7647 KMP_DEBUG_ASSERT(set_nproc > 1);
7649 if (set_nproc == 1) {
7650 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7659 pool_active = __kmp_thread_pool_active_nth;
7660 hot_team_active = __kmp_active_hot_team_nproc(root);
7661 team_curr_active = pool_active + hot_team_active + 1;
7664 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7665 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
7666 "hot team active = %d\n",
7667 system_active, pool_active, hot_team_active));
7669 if (system_active < 0) {
7673 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7674 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7677 retval = __kmp_avail_proc - __kmp_nth +
7678 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7679 if (retval > set_nproc) {
7682 if (retval < KMP_MIN_NTH) {
7683 retval = KMP_MIN_NTH;
7686 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7694 if (system_active < team_curr_active) {
7695 system_active = team_curr_active;
7697 retval = __kmp_avail_proc - system_active + team_curr_active;
7698 if (retval > set_nproc) {
7701 if (retval < KMP_MIN_NTH) {
7702 retval = KMP_MIN_NTH;
7705 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7714 void __kmp_cleanup(
void) {
7717 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7719 if (TCR_4(__kmp_init_parallel)) {
7720 #if KMP_HANDLE_SIGNALS
7721 __kmp_remove_signals();
7723 TCW_4(__kmp_init_parallel, FALSE);
7726 if (TCR_4(__kmp_init_middle)) {
7727 #if KMP_AFFINITY_SUPPORTED
7728 __kmp_affinity_uninitialize();
7730 __kmp_cleanup_hierarchy();
7731 TCW_4(__kmp_init_middle, FALSE);
7734 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7736 if (__kmp_init_serial) {
7737 __kmp_runtime_destroy();
7738 __kmp_init_serial = FALSE;
7741 __kmp_cleanup_threadprivate_caches();
7743 for (f = 0; f < __kmp_threads_capacity; f++) {
7744 if (__kmp_root[f] != NULL) {
7745 __kmp_free(__kmp_root[f]);
7746 __kmp_root[f] = NULL;
7749 __kmp_free(__kmp_threads);
7752 __kmp_threads = NULL;
7754 __kmp_threads_capacity = 0;
7756 #if KMP_USE_DYNAMIC_LOCK
7757 __kmp_cleanup_indirect_user_locks();
7759 __kmp_cleanup_user_locks();
7762 #if KMP_AFFINITY_SUPPORTED
7763 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7764 __kmp_cpuinfo_file = NULL;
7767 #if KMP_USE_ADAPTIVE_LOCKS
7768 #if KMP_DEBUG_ADAPTIVE_LOCKS
7769 __kmp_print_speculative_stats();
7772 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7773 __kmp_nested_nth.nth = NULL;
7774 __kmp_nested_nth.size = 0;
7775 __kmp_nested_nth.used = 0;
7776 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7777 __kmp_nested_proc_bind.bind_types = NULL;
7778 __kmp_nested_proc_bind.size = 0;
7779 __kmp_nested_proc_bind.used = 0;
7780 if (__kmp_affinity_format) {
7781 KMP_INTERNAL_FREE(__kmp_affinity_format);
7782 __kmp_affinity_format = NULL;
7785 __kmp_i18n_catclose();
7787 #if KMP_USE_HIER_SCHED
7788 __kmp_hier_scheds.deallocate();
7791 #if KMP_STATS_ENABLED
7795 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7800 int __kmp_ignore_mppbeg(
void) {
7803 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7804 if (__kmp_str_match_false(env))
7811 int __kmp_ignore_mppend(
void) {
7814 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7815 if (__kmp_str_match_false(env))
7822 void __kmp_internal_begin(
void) {
7828 gtid = __kmp_entry_gtid();
7829 root = __kmp_threads[gtid]->th.th_root;
7830 KMP_ASSERT(KMP_UBER_GTID(gtid));
7832 if (root->r.r_begin)
7834 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7835 if (root->r.r_begin) {
7836 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7840 root->r.r_begin = TRUE;
7842 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7847 void __kmp_user_set_library(
enum library_type arg) {
7854 gtid = __kmp_entry_gtid();
7855 thread = __kmp_threads[gtid];
7857 root = thread->th.th_root;
7859 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7861 if (root->r.r_in_parallel) {
7863 KMP_WARNING(SetLibraryIncorrectCall);
7868 case library_serial:
7869 thread->th.th_set_nproc = 0;
7870 set__nproc(thread, 1);
7872 case library_turnaround:
7873 thread->th.th_set_nproc = 0;
7874 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7875 : __kmp_dflt_team_nth_ub);
7877 case library_throughput:
7878 thread->th.th_set_nproc = 0;
7879 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7880 : __kmp_dflt_team_nth_ub);
7883 KMP_FATAL(UnknownLibraryType, arg);
7886 __kmp_aux_set_library(arg);
7889 void __kmp_aux_set_stacksize(
size_t arg) {
7890 if (!__kmp_init_serial)
7891 __kmp_serial_initialize();
7894 if (arg & (0x1000 - 1)) {
7895 arg &= ~(0x1000 - 1);
7900 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7903 if (!TCR_4(__kmp_init_parallel)) {
7906 if (value < __kmp_sys_min_stksize)
7907 value = __kmp_sys_min_stksize;
7908 else if (value > KMP_MAX_STKSIZE)
7909 value = KMP_MAX_STKSIZE;
7911 __kmp_stksize = value;
7913 __kmp_env_stksize = TRUE;
7916 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7921 void __kmp_aux_set_library(
enum library_type arg) {
7922 __kmp_library = arg;
7924 switch (__kmp_library) {
7925 case library_serial: {
7926 KMP_INFORM(LibraryIsSerial);
7928 case library_turnaround:
7929 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
7930 __kmp_use_yield = 2;
7932 case library_throughput:
7933 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
7934 __kmp_dflt_blocktime = 200;
7937 KMP_FATAL(UnknownLibraryType, arg);
7943 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
7944 kmp_info_t *thr = __kmp_entry_thread();
7945 teams_serialized = 0;
7946 if (thr->th.th_teams_microtask) {
7947 kmp_team_t *team = thr->th.th_team;
7948 int tlevel = thr->th.th_teams_level;
7949 int ii = team->t.t_level;
7950 teams_serialized = team->t.t_serialized;
7951 int level = tlevel + 1;
7952 KMP_DEBUG_ASSERT(ii >= tlevel);
7953 while (ii > level) {
7954 for (teams_serialized = team->t.t_serialized;
7955 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
7957 if (team->t.t_serialized && (!teams_serialized)) {
7958 team = team->t.t_parent;
7962 team = team->t.t_parent;
7971 int __kmp_aux_get_team_num() {
7973 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7975 if (serialized > 1) {
7978 return team->t.t_master_tid;
7984 int __kmp_aux_get_num_teams() {
7986 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
7988 if (serialized > 1) {
7991 return team->t.t_parent->t.t_nproc;
8030 typedef struct kmp_affinity_format_field_t {
8032 const char *long_name;
8035 } kmp_affinity_format_field_t;
8037 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8038 #if KMP_AFFINITY_SUPPORTED
8039 {
'A',
"thread_affinity",
's'},
8041 {
't',
"team_num",
'd'},
8042 {
'T',
"num_teams",
'd'},
8043 {
'L',
"nesting_level",
'd'},
8044 {
'n',
"thread_num",
'd'},
8045 {
'N',
"num_threads",
'd'},
8046 {
'a',
"ancestor_tnum",
'd'},
8048 {
'P',
"process_id",
'd'},
8049 {
'i',
"native_thread_id",
'd'}};
8052 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8054 kmp_str_buf_t *field_buffer) {
8055 int rc, format_index, field_value;
8056 const char *width_left, *width_right;
8057 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8058 static const int FORMAT_SIZE = 20;
8059 char format[FORMAT_SIZE] = {0};
8060 char absolute_short_name = 0;
8062 KMP_DEBUG_ASSERT(gtid >= 0);
8063 KMP_DEBUG_ASSERT(th);
8064 KMP_DEBUG_ASSERT(**ptr ==
'%');
8065 KMP_DEBUG_ASSERT(field_buffer);
8067 __kmp_str_buf_clear(field_buffer);
8074 __kmp_str_buf_cat(field_buffer,
"%", 1);
8085 right_justify =
false;
8087 right_justify =
true;
8091 width_left = width_right = NULL;
8092 if (**ptr >=
'0' && **ptr <=
'9') {
8100 format[format_index++] =
'%';
8102 format[format_index++] =
'-';
8104 format[format_index++] =
'0';
8105 if (width_left && width_right) {
8109 while (i < 8 && width_left < width_right) {
8110 format[format_index++] = *width_left;
8118 found_valid_name =
false;
8119 parse_long_name = (**ptr ==
'{');
8120 if (parse_long_name)
8122 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8123 sizeof(__kmp_affinity_format_table[0]);
8125 char short_name = __kmp_affinity_format_table[i].short_name;
8126 const char *long_name = __kmp_affinity_format_table[i].long_name;
8127 char field_format = __kmp_affinity_format_table[i].field_format;
8128 if (parse_long_name) {
8129 size_t length = KMP_STRLEN(long_name);
8130 if (strncmp(*ptr, long_name, length) == 0) {
8131 found_valid_name =
true;
8134 }
else if (**ptr == short_name) {
8135 found_valid_name =
true;
8138 if (found_valid_name) {
8139 format[format_index++] = field_format;
8140 format[format_index++] =
'\0';
8141 absolute_short_name = short_name;
8145 if (parse_long_name) {
8147 absolute_short_name = 0;
8155 switch (absolute_short_name) {
8157 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8160 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8163 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8166 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8169 static const int BUFFER_SIZE = 256;
8170 char buf[BUFFER_SIZE];
8171 __kmp_expand_host_name(buf, BUFFER_SIZE);
8172 rc = __kmp_str_buf_print(field_buffer, format, buf);
8175 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8178 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8181 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8185 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8186 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8188 #if KMP_AFFINITY_SUPPORTED
8191 __kmp_str_buf_init(&buf);
8192 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8193 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8194 __kmp_str_buf_free(&buf);
8200 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8202 if (parse_long_name) {
8211 KMP_ASSERT(format_index <= FORMAT_SIZE);
8221 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8222 kmp_str_buf_t *buffer) {
8223 const char *parse_ptr;
8225 const kmp_info_t *th;
8226 kmp_str_buf_t field;
8228 KMP_DEBUG_ASSERT(buffer);
8229 KMP_DEBUG_ASSERT(gtid >= 0);
8231 __kmp_str_buf_init(&field);
8232 __kmp_str_buf_clear(buffer);
8234 th = __kmp_threads[gtid];
8240 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8241 parse_ptr = __kmp_affinity_format;
8243 KMP_DEBUG_ASSERT(parse_ptr);
8245 while (*parse_ptr !=
'\0') {
8247 if (*parse_ptr ==
'%') {
8249 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8250 __kmp_str_buf_catbuf(buffer, &field);
8254 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8259 __kmp_str_buf_free(&field);
8264 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8266 __kmp_str_buf_init(&buf);
8267 __kmp_aux_capture_affinity(gtid, format, &buf);
8268 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8269 __kmp_str_buf_free(&buf);
8274 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8275 int blocktime = arg;
8281 __kmp_save_internal_controls(thread);
8284 if (blocktime < KMP_MIN_BLOCKTIME)
8285 blocktime = KMP_MIN_BLOCKTIME;
8286 else if (blocktime > KMP_MAX_BLOCKTIME)
8287 blocktime = KMP_MAX_BLOCKTIME;
8289 set__blocktime_team(thread->th.th_team, tid, blocktime);
8290 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8294 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8296 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8297 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8303 set__bt_set_team(thread->th.th_team, tid, bt_set);
8304 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8306 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8307 "bt_intervals=%d, monitor_updates=%d\n",
8308 __kmp_gtid_from_tid(tid, thread->th.th_team),
8309 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8310 __kmp_monitor_wakeups));
8312 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8313 __kmp_gtid_from_tid(tid, thread->th.th_team),
8314 thread->th.th_team->t.t_id, tid, blocktime));
8318 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8319 if (!__kmp_init_serial) {
8320 __kmp_serial_initialize();
8322 __kmp_env_initialize(str);
8324 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8332 PACKED_REDUCTION_METHOD_T
8333 __kmp_determine_reduction_method(
8334 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8335 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8336 kmp_critical_name *lck) {
8347 PACKED_REDUCTION_METHOD_T retval;
8351 KMP_DEBUG_ASSERT(loc);
8352 KMP_DEBUG_ASSERT(lck);
8354 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8355 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE))
8356 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8358 retval = critical_reduce_block;
8361 team_size = __kmp_get_team_num_threads(global_tid);
8362 if (team_size == 1) {
8364 retval = empty_reduce_block;
8368 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8370 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8371 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8373 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8374 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8376 int teamsize_cutoff = 4;
8378 #if KMP_MIC_SUPPORTED
8379 if (__kmp_mic_type != non_mic) {
8380 teamsize_cutoff = 8;
8383 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8384 if (tree_available) {
8385 if (team_size <= teamsize_cutoff) {
8386 if (atomic_available) {
8387 retval = atomic_reduce_block;
8390 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8392 }
else if (atomic_available) {
8393 retval = atomic_reduce_block;
8396 #error "Unknown or unsupported OS"
8400 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8402 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8406 if (atomic_available) {
8407 if (num_vars <= 2) {
8408 retval = atomic_reduce_block;
8414 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8415 if (atomic_available && (num_vars <= 3)) {
8416 retval = atomic_reduce_block;
8417 }
else if (tree_available) {
8418 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8419 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8420 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8425 #error "Unknown or unsupported OS"
8429 #error "Unknown or unsupported architecture"
8437 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8440 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8442 int atomic_available, tree_available;
8444 switch ((forced_retval = __kmp_force_reduction_method)) {
8445 case critical_reduce_block:
8449 case atomic_reduce_block:
8450 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8451 if (!atomic_available) {
8452 KMP_WARNING(RedMethodNotSupported,
"atomic");
8453 forced_retval = critical_reduce_block;
8457 case tree_reduce_block:
8458 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8459 if (!tree_available) {
8460 KMP_WARNING(RedMethodNotSupported,
"tree");
8461 forced_retval = critical_reduce_block;
8463 #if KMP_FAST_REDUCTION_BARRIER
8464 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8473 retval = forced_retval;
8476 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8478 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8479 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8484 kmp_int32 __kmp_get_reduce_method(
void) {
8485 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8490 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8494 void __kmp_hard_pause() {
8495 __kmp_pause_status = kmp_hard_paused;
8496 __kmp_internal_end_thread(-1);
8500 void __kmp_resume_if_soft_paused() {
8501 if (__kmp_pause_status == kmp_soft_paused) {
8502 __kmp_pause_status = kmp_not_paused;
8504 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8505 kmp_info_t *thread = __kmp_threads[gtid];
8507 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8509 if (fl.is_sleeping())
8511 else if (__kmp_try_suspend_mx(thread)) {
8512 __kmp_unlock_suspend_mx(thread);
8515 if (fl.is_sleeping()) {
8518 }
else if (__kmp_try_suspend_mx(thread)) {
8519 __kmp_unlock_suspend_mx(thread);
8531 int __kmp_pause_resource(kmp_pause_status_t level) {
8532 if (level == kmp_not_paused) {
8533 if (__kmp_pause_status == kmp_not_paused) {
8537 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8538 __kmp_pause_status == kmp_hard_paused);
8539 __kmp_pause_status = kmp_not_paused;
8542 }
else if (level == kmp_soft_paused) {
8543 if (__kmp_pause_status != kmp_not_paused) {
8550 }
else if (level == kmp_hard_paused) {
8551 if (__kmp_pause_status != kmp_not_paused) {
8564 void __kmp_omp_display_env(
int verbose) {
8565 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8566 if (__kmp_init_serial == 0)
8567 __kmp_do_serial_initialize();
8568 __kmp_display_env_impl(!verbose, verbose);
8569 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8573 kmp_info_t **__kmp_hidden_helper_threads;
8574 kmp_info_t *__kmp_hidden_helper_main_thread;
8575 kmp_int32 __kmp_hidden_helper_threads_num = 8;
8576 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
8578 kmp_int32 __kmp_enable_hidden_helper = TRUE;
8580 kmp_int32 __kmp_enable_hidden_helper = FALSE;
8584 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
8586 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
8591 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
8592 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
8593 __kmp_hidden_helper_threads_num)
8599 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
8600 __kmp_hidden_helper_initz_release();
8601 __kmp_hidden_helper_main_thread_wait();
8603 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
8604 __kmp_hidden_helper_worker_thread_signal();
8610 void __kmp_hidden_helper_threads_initz_routine() {
8612 const int gtid = __kmp_register_root(TRUE);
8613 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
8614 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
8615 __kmp_hidden_helper_main_thread->th.th_set_nproc =
8616 __kmp_hidden_helper_threads_num;
8618 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
8623 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
8625 __kmp_hidden_helper_threads_deinitz_release();
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)