16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
43 }
else if (__kmp_ignore_mppbeg() == FALSE) {
45 __kmp_internal_begin();
46 KC_TRACE(10, (
"__kmpc_begin: called\n"));
64 if (__kmp_ignore_mppend() == FALSE) {
65 KC_TRACE(10, (
"__kmpc_end: called\n"));
66 KA_TRACE(30, (
"__kmpc_end\n"));
68 __kmp_internal_end_thread(-1);
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75 if (ompt_enabled.enabled)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
99 kmp_int32 gtid = __kmp_entry_gtid();
101 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
122 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 return TCR_4(__kmp_all_nth);
134 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
144 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
146 return __kmp_entry_thread()->th.th_team->t.t_nproc;
166 if (__kmp_par_range == 0) {
173 semi2 = strchr(semi2,
';');
177 semi2 = strchr(semi2 + 1,
';');
181 if (__kmp_par_range_filename[0]) {
182 const char *name = semi2 - 1;
183 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
186 if ((*name ==
'/') || (*name ==
';')) {
189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190 return __kmp_par_range < 0;
193 semi3 = strchr(semi2 + 1,
';');
194 if (__kmp_par_range_routine[0]) {
195 if ((semi3 != NULL) && (semi3 > semi2) &&
196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197 return __kmp_par_range < 0;
200 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202 return __kmp_par_range > 0;
204 return __kmp_par_range < 0;
218 return __kmp_entry_thread()->th.th_root->r.r_active;
231 kmp_int32 num_threads) {
232 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid, num_threads));
234 __kmp_assert_valid_gtid(global_tid);
235 __kmp_push_num_threads(loc, global_tid, num_threads);
238 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
239 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
243 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
244 kmp_int32 proc_bind) {
245 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 __kmp_assert_valid_gtid(global_tid);
248 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
262 int gtid = __kmp_entry_gtid();
264 #if (KMP_STATS_ENABLED)
268 if (previous_state == stats_state_e::SERIAL_REGION) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
284 va_start(ap, microtask);
287 ompt_frame_t *ompt_frame;
288 if (ompt_enabled.enabled) {
289 kmp_info_t *master_th = __kmp_threads[gtid];
290 kmp_team_t *parent_team = master_th->th.th_team;
291 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293 ompt_frame = &(lwt->ompt_task_info.frame);
295 int tid = __kmp_tid_from_gtid(gtid);
297 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301 OMPT_STORE_RETURN_ADDRESS(gtid);
304 #if INCLUDE_SSC_MARKS
307 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
308 VOLATILE_CAST(microtask_t) microtask,
309 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 #if INCLUDE_SSC_MARKS
314 __kmp_join_call(loc, gtid
324 #if KMP_STATS_ENABLED
325 if (previous_state == stats_state_e::SERIAL_REGION) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
327 KMP_SET_THREAD_STATE(previous_state);
329 KMP_POP_PARTITIONED_TIMER();
346 kmp_int32 num_teams, kmp_int32 num_threads) {
348 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
349 global_tid, num_teams, num_threads));
350 __kmp_assert_valid_gtid(global_tid);
351 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
366 int gtid = __kmp_entry_gtid();
367 kmp_info_t *this_thr = __kmp_threads[gtid];
369 va_start(ap, microtask);
371 #if KMP_STATS_ENABLED
374 if (previous_state == stats_state_e::SERIAL_REGION) {
375 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
377 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
382 this_thr->th.th_teams_microtask = microtask;
383 this_thr->th.th_teams_level =
384 this_thr->th.th_team->t.t_level;
387 kmp_team_t *parent_team = this_thr->th.th_team;
388 int tid = __kmp_tid_from_gtid(gtid);
389 if (ompt_enabled.enabled) {
390 parent_team->t.t_implicit_task_taskdata[tid]
391 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
393 OMPT_STORE_RETURN_ADDRESS(gtid);
398 if (this_thr->th.th_teams_size.nteams == 0) {
399 __kmp_push_num_teams(loc, gtid, 0, 0);
401 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
402 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
403 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
406 loc, gtid, fork_context_intel, argc,
407 VOLATILE_CAST(microtask_t) __kmp_teams_master,
408 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
409 __kmp_join_call(loc, gtid
417 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
418 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
419 this_thr->th.th_cg_roots = tmp->up;
420 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
421 " to node %p. cg_nthreads was %d\n",
422 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
423 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
424 int i = tmp->cg_nthreads--;
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 this_thr->th.th_current_task->td_icvs.thread_limit =
431 this_thr->th.th_cg_roots->cg_thread_limit;
433 this_thr->th.th_teams_microtask = NULL;
434 this_thr->th.th_teams_level = 0;
435 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
437 #if KMP_STATS_ENABLED
438 if (previous_state == stats_state_e::SERIAL_REGION) {
439 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
440 KMP_SET_THREAD_STATE(previous_state);
442 KMP_POP_PARTITIONED_TIMER();
451 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
469 __kmp_assert_valid_gtid(global_tid);
471 OMPT_STORE_RETURN_ADDRESS(global_tid);
473 __kmp_serialized_parallel(loc, global_tid);
484 kmp_internal_control_t *top;
485 kmp_info_t *this_thr;
486 kmp_team_t *serial_team;
489 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
497 __kmp_assert_valid_gtid(global_tid);
498 if (!TCR_4(__kmp_init_parallel))
499 __kmp_parallel_initialize();
501 __kmp_resume_if_soft_paused();
503 this_thr = __kmp_threads[global_tid];
504 serial_team = this_thr->th.th_serial_team;
506 kmp_task_team_t *task_team = this_thr->th.th_task_team;
508 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
509 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
512 KMP_DEBUG_ASSERT(serial_team);
513 KMP_ASSERT(serial_team->t.t_serialized);
514 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
515 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
516 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
517 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
520 if (ompt_enabled.enabled &&
521 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
522 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
523 if (ompt_enabled.ompt_callback_implicit_task) {
524 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
525 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
526 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
530 ompt_data_t *parent_task_data;
531 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
533 if (ompt_enabled.ompt_callback_parallel_end) {
534 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
535 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
536 ompt_parallel_invoker_program | ompt_parallel_team,
537 OMPT_LOAD_RETURN_ADDRESS(global_tid));
539 __ompt_lw_taskteam_unlink(this_thr);
540 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
546 top = serial_team->t.t_control_stack_top;
547 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
548 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
549 serial_team->t.t_control_stack_top = top->next;
554 serial_team->t.t_level--;
557 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
559 dispatch_private_info_t *disp_buffer =
560 serial_team->t.t_dispatch->th_disp_buffer;
561 serial_team->t.t_dispatch->th_disp_buffer =
562 serial_team->t.t_dispatch->th_disp_buffer->next;
563 __kmp_free(disp_buffer);
565 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
567 --serial_team->t.t_serialized;
568 if (serial_team->t.t_serialized == 0) {
572 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
573 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
574 __kmp_clear_x87_fpu_status_word();
575 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
576 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
580 this_thr->th.th_team = serial_team->t.t_parent;
581 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
584 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
585 this_thr->th.th_team_master =
586 serial_team->t.t_parent->t.t_threads[0];
587 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
590 this_thr->th.th_dispatch =
591 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
593 __kmp_pop_current_task_from_thread(this_thr);
595 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
596 this_thr->th.th_current_task->td_flags.executing = 1;
598 if (__kmp_tasking_mode != tskm_immediate_exec) {
600 this_thr->th.th_task_team =
601 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
603 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
605 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
608 if (__kmp_tasking_mode != tskm_immediate_exec) {
609 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
610 "depth of serial team %p to %d\n",
611 global_tid, serial_team, serial_team->t.t_serialized));
615 if (__kmp_env_consistency_check)
616 __kmp_pop_parallel(global_tid, NULL);
618 if (ompt_enabled.enabled)
619 this_thr->th.ompt_thread_info.state =
620 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
621 : ompt_state_work_parallel);
634 KC_TRACE(10, (
"__kmpc_flush: called\n"));
639 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
653 if (!__kmp_cpuinfo.initialized) {
654 __kmp_query_cpuid(&__kmp_cpuinfo);
656 if (!__kmp_cpuinfo.sse2) {
661 #elif KMP_COMPILER_MSVC
664 __sync_synchronize();
668 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
674 #error Unknown or unsupported architecture
677 #if OMPT_SUPPORT && OMPT_OPTIONAL
678 if (ompt_enabled.ompt_callback_flush) {
679 ompt_callbacks.ompt_callback(ompt_callback_flush)(
680 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
695 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
696 __kmp_assert_valid_gtid(global_tid);
698 if (!TCR_4(__kmp_init_parallel))
699 __kmp_parallel_initialize();
701 __kmp_resume_if_soft_paused();
703 if (__kmp_env_consistency_check) {
705 KMP_WARNING(ConstructIdentInvalid);
707 __kmp_check_barrier(global_tid, ct_barrier, loc);
711 ompt_frame_t *ompt_frame;
712 if (ompt_enabled.enabled) {
713 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
714 if (ompt_frame->enter_frame.ptr == NULL)
715 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
717 OMPT_STORE_RETURN_ADDRESS(global_tid);
719 __kmp_threads[global_tid]->th.th_ident = loc;
727 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
728 #if OMPT_SUPPORT && OMPT_OPTIONAL
729 if (ompt_enabled.enabled) {
730 ompt_frame->enter_frame = ompt_data_none;
745 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
746 __kmp_assert_valid_gtid(global_tid);
748 if (!TCR_4(__kmp_init_parallel))
749 __kmp_parallel_initialize();
751 __kmp_resume_if_soft_paused();
753 if (KMP_MASTER_GTID(global_tid)) {
755 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
759 #if OMPT_SUPPORT && OMPT_OPTIONAL
761 if (ompt_enabled.ompt_callback_masked) {
762 kmp_info_t *this_thr = __kmp_threads[global_tid];
763 kmp_team_t *team = this_thr->th.th_team;
765 int tid = __kmp_tid_from_gtid(global_tid);
766 ompt_callbacks.ompt_callback(ompt_callback_masked)(
767 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
768 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
769 OMPT_GET_RETURN_ADDRESS(0));
774 if (__kmp_env_consistency_check) {
775 #if KMP_USE_DYNAMIC_LOCK
777 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
779 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
782 __kmp_push_sync(global_tid, ct_master, loc, NULL);
784 __kmp_check_sync(global_tid, ct_master, loc, NULL);
800 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
801 __kmp_assert_valid_gtid(global_tid);
802 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
803 KMP_POP_PARTITIONED_TIMER();
805 #if OMPT_SUPPORT && OMPT_OPTIONAL
806 kmp_info_t *this_thr = __kmp_threads[global_tid];
807 kmp_team_t *team = this_thr->th.th_team;
808 if (ompt_enabled.ompt_callback_masked) {
809 int tid = __kmp_tid_from_gtid(global_tid);
810 ompt_callbacks.ompt_callback(ompt_callback_masked)(
811 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
812 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
813 OMPT_GET_RETURN_ADDRESS(0));
817 if (__kmp_env_consistency_check) {
818 if (KMP_MASTER_GTID(global_tid))
819 __kmp_pop_sync(global_tid, ct_master, loc);
833 KMP_DEBUG_ASSERT(__kmp_init_serial);
835 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
836 __kmp_assert_valid_gtid(gtid);
838 if (!TCR_4(__kmp_init_parallel))
839 __kmp_parallel_initialize();
841 __kmp_resume_if_soft_paused();
844 __kmp_itt_ordered_prep(gtid);
848 th = __kmp_threads[gtid];
850 #if OMPT_SUPPORT && OMPT_OPTIONAL
854 OMPT_STORE_RETURN_ADDRESS(gtid);
855 if (ompt_enabled.enabled) {
856 team = __kmp_team_from_gtid(gtid);
857 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
859 th->th.ompt_thread_info.wait_id = lck;
860 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
863 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
864 if (ompt_enabled.ompt_callback_mutex_acquire) {
865 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
866 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
872 if (th->th.th_dispatch->th_deo_fcn != 0)
873 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
875 __kmp_parallel_deo(>id, &cid, loc);
877 #if OMPT_SUPPORT && OMPT_OPTIONAL
878 if (ompt_enabled.enabled) {
880 th->th.ompt_thread_info.state = ompt_state_work_parallel;
881 th->th.ompt_thread_info.wait_id = 0;
884 if (ompt_enabled.ompt_callback_mutex_acquired) {
885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
886 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
892 __kmp_itt_ordered_start(gtid);
907 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
908 __kmp_assert_valid_gtid(gtid);
911 __kmp_itt_ordered_end(gtid);
915 th = __kmp_threads[gtid];
917 if (th->th.th_dispatch->th_dxo_fcn != 0)
918 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
920 __kmp_parallel_dxo(>id, &cid, loc);
922 #if OMPT_SUPPORT && OMPT_OPTIONAL
923 OMPT_STORE_RETURN_ADDRESS(gtid);
924 if (ompt_enabled.ompt_callback_mutex_released) {
925 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
927 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
928 ->t.t_ordered.dt.t_value,
929 OMPT_LOAD_RETURN_ADDRESS(gtid));
934 #if KMP_USE_DYNAMIC_LOCK
936 static __forceinline
void
937 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
938 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
942 kmp_indirect_lock_t **lck;
943 lck = (kmp_indirect_lock_t **)crit;
944 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
945 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
946 KMP_SET_I_LOCK_LOCATION(ilk, loc);
947 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
949 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
951 __kmp_itt_critical_creating(ilk->lock, loc);
953 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
956 __kmp_itt_critical_destroyed(ilk->lock);
962 KMP_DEBUG_ASSERT(*lck != NULL);
966 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
968 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
969 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
970 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
971 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
972 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
974 KMP_FSYNC_PREPARE(l); \
975 KMP_INIT_YIELD(spins); \
976 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
978 if (TCR_4(__kmp_nth) > \
979 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
982 KMP_YIELD_SPIN(spins); \
984 __kmp_spin_backoff(&backoff); \
986 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
987 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
989 KMP_FSYNC_ACQUIRED(l); \
993 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
995 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
996 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
997 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
998 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
999 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1003 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1004 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1008 #include <sys/syscall.h>
1011 #define FUTEX_WAIT 0
1014 #define FUTEX_WAKE 1
1018 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1020 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1021 kmp_int32 gtid_code = (gtid + 1) << 1; \
1023 KMP_FSYNC_PREPARE(ftx); \
1024 kmp_int32 poll_val; \
1025 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1026 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1027 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1028 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1030 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1032 KMP_LOCK_BUSY(1, futex))) { \
1035 poll_val |= KMP_LOCK_BUSY(1, futex); \
1038 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1039 NULL, NULL, 0)) != 0) { \
1044 KMP_FSYNC_ACQUIRED(ftx); \
1048 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1050 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1051 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1052 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1053 KMP_FSYNC_ACQUIRED(ftx); \
1061 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1063 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1065 KMP_FSYNC_RELEASING(ftx); \
1066 kmp_int32 poll_val = \
1067 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1068 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1069 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1070 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1073 KMP_YIELD_OVERSUB(); \
1080 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1083 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1086 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1093 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1094 __kmp_init_user_lock_with_checks(lck);
1095 __kmp_set_user_lock_location(lck, loc);
1097 __kmp_itt_critical_creating(lck);
1108 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1113 __kmp_itt_critical_destroyed(lck);
1117 __kmp_destroy_user_lock_with_checks(lck);
1118 __kmp_user_lock_free(&idx, gtid, lck);
1119 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1120 KMP_DEBUG_ASSERT(lck != NULL);
1139 kmp_critical_name *crit) {
1140 #if KMP_USE_DYNAMIC_LOCK
1141 #if OMPT_SUPPORT && OMPT_OPTIONAL
1142 OMPT_STORE_RETURN_ADDRESS(global_tid);
1147 #if OMPT_SUPPORT && OMPT_OPTIONAL
1148 ompt_state_t prev_state = ompt_state_undefined;
1149 ompt_thread_info_t ti;
1151 kmp_user_lock_p lck;
1153 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1154 __kmp_assert_valid_gtid(global_tid);
1158 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1159 KMP_CHECK_USER_LOCK_INIT();
1161 if ((__kmp_user_lock_kind == lk_tas) &&
1162 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1163 lck = (kmp_user_lock_p)crit;
1166 else if ((__kmp_user_lock_kind == lk_futex) &&
1167 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1168 lck = (kmp_user_lock_p)crit;
1172 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1175 if (__kmp_env_consistency_check)
1176 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1184 __kmp_itt_critical_acquiring(lck);
1186 #if OMPT_SUPPORT && OMPT_OPTIONAL
1187 OMPT_STORE_RETURN_ADDRESS(gtid);
1188 void *codeptr_ra = NULL;
1189 if (ompt_enabled.enabled) {
1190 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1192 prev_state = ti.state;
1193 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1194 ti.state = ompt_state_wait_critical;
1197 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1198 if (ompt_enabled.ompt_callback_mutex_acquire) {
1199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1200 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1201 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1207 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1210 __kmp_itt_critical_acquired(lck);
1212 #if OMPT_SUPPORT && OMPT_OPTIONAL
1213 if (ompt_enabled.enabled) {
1215 ti.state = prev_state;
1219 if (ompt_enabled.ompt_callback_mutex_acquired) {
1220 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1221 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1225 KMP_POP_PARTITIONED_TIMER();
1227 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1228 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1232 #if KMP_USE_DYNAMIC_LOCK
1235 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1237 #define KMP_TSX_LOCK(seq) lockseq_##seq
1239 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1242 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1243 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1245 #define KMP_CPUINFO_RTM 0
1249 if (hint & kmp_lock_hint_hle)
1250 return KMP_TSX_LOCK(hle);
1251 if (hint & kmp_lock_hint_rtm)
1252 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1253 if (hint & kmp_lock_hint_adaptive)
1254 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1257 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1258 return __kmp_user_lock_seq;
1259 if ((hint & omp_lock_hint_speculative) &&
1260 (hint & omp_lock_hint_nonspeculative))
1261 return __kmp_user_lock_seq;
1264 if (hint & omp_lock_hint_contended)
1265 return lockseq_queuing;
1268 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1272 if (hint & omp_lock_hint_speculative)
1273 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1275 return __kmp_user_lock_seq;
1278 #if OMPT_SUPPORT && OMPT_OPTIONAL
1279 #if KMP_USE_DYNAMIC_LOCK
1280 static kmp_mutex_impl_t
1281 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1283 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1288 return kmp_mutex_impl_queuing;
1291 return kmp_mutex_impl_spin;
1294 case locktag_rtm_spin:
1295 return kmp_mutex_impl_speculative;
1298 return kmp_mutex_impl_none;
1300 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1303 switch (ilock->type) {
1305 case locktag_adaptive:
1306 case locktag_rtm_queuing:
1307 return kmp_mutex_impl_speculative;
1309 case locktag_nested_tas:
1310 return kmp_mutex_impl_spin;
1312 case locktag_nested_futex:
1314 case locktag_ticket:
1315 case locktag_queuing:
1317 case locktag_nested_ticket:
1318 case locktag_nested_queuing:
1319 case locktag_nested_drdpa:
1320 return kmp_mutex_impl_queuing;
1322 return kmp_mutex_impl_none;
1327 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1328 switch (__kmp_user_lock_kind) {
1330 return kmp_mutex_impl_spin;
1337 return kmp_mutex_impl_queuing;
1340 case lk_rtm_queuing:
1343 return kmp_mutex_impl_speculative;
1346 return kmp_mutex_impl_none;
1366 kmp_critical_name *crit, uint32_t hint) {
1368 kmp_user_lock_p lck;
1369 #if OMPT_SUPPORT && OMPT_OPTIONAL
1370 ompt_state_t prev_state = ompt_state_undefined;
1371 ompt_thread_info_t ti;
1373 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1375 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1378 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1379 __kmp_assert_valid_gtid(global_tid);
1381 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1383 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1385 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1386 if (KMP_IS_D_LOCK(lckseq)) {
1387 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1388 KMP_GET_D_TAG(lckseq));
1390 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1396 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1397 lck = (kmp_user_lock_p)lk;
1398 if (__kmp_env_consistency_check) {
1399 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1400 __kmp_map_hint_to_lock(hint));
1403 __kmp_itt_critical_acquiring(lck);
1405 #if OMPT_SUPPORT && OMPT_OPTIONAL
1406 if (ompt_enabled.enabled) {
1407 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1409 prev_state = ti.state;
1410 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1411 ti.state = ompt_state_wait_critical;
1414 if (ompt_enabled.ompt_callback_mutex_acquire) {
1415 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1416 ompt_mutex_critical, (
unsigned int)hint,
1417 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1422 #if KMP_USE_INLINED_TAS
1423 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1424 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1426 #elif KMP_USE_INLINED_FUTEX
1427 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1428 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1432 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1435 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1437 if (__kmp_env_consistency_check) {
1438 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1439 __kmp_map_hint_to_lock(hint));
1442 __kmp_itt_critical_acquiring(lck);
1444 #if OMPT_SUPPORT && OMPT_OPTIONAL
1445 if (ompt_enabled.enabled) {
1446 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1448 prev_state = ti.state;
1449 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1450 ti.state = ompt_state_wait_critical;
1453 if (ompt_enabled.ompt_callback_mutex_acquire) {
1454 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1455 ompt_mutex_critical, (
unsigned int)hint,
1456 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1461 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1463 KMP_POP_PARTITIONED_TIMER();
1466 __kmp_itt_critical_acquired(lck);
1468 #if OMPT_SUPPORT && OMPT_OPTIONAL
1469 if (ompt_enabled.enabled) {
1471 ti.state = prev_state;
1475 if (ompt_enabled.ompt_callback_mutex_acquired) {
1476 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1477 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1482 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1483 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1498 kmp_critical_name *crit) {
1499 kmp_user_lock_p lck;
1501 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1503 #if KMP_USE_DYNAMIC_LOCK
1504 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1505 lck = (kmp_user_lock_p)crit;
1506 KMP_ASSERT(lck != NULL);
1507 if (__kmp_env_consistency_check) {
1508 __kmp_pop_sync(global_tid, ct_critical, loc);
1511 __kmp_itt_critical_releasing(lck);
1513 #if KMP_USE_INLINED_TAS
1514 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1515 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1517 #elif KMP_USE_INLINED_FUTEX
1518 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1519 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1523 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1526 kmp_indirect_lock_t *ilk =
1527 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1528 KMP_ASSERT(ilk != NULL);
1530 if (__kmp_env_consistency_check) {
1531 __kmp_pop_sync(global_tid, ct_critical, loc);
1534 __kmp_itt_critical_releasing(lck);
1536 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1541 if ((__kmp_user_lock_kind == lk_tas) &&
1542 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1543 lck = (kmp_user_lock_p)crit;
1546 else if ((__kmp_user_lock_kind == lk_futex) &&
1547 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1548 lck = (kmp_user_lock_p)crit;
1552 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1555 KMP_ASSERT(lck != NULL);
1557 if (__kmp_env_consistency_check)
1558 __kmp_pop_sync(global_tid, ct_critical, loc);
1561 __kmp_itt_critical_releasing(lck);
1565 __kmp_release_user_lock_with_checks(lck, global_tid);
1569 #if OMPT_SUPPORT && OMPT_OPTIONAL
1572 OMPT_STORE_RETURN_ADDRESS(global_tid);
1573 if (ompt_enabled.ompt_callback_mutex_released) {
1574 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1575 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1576 OMPT_LOAD_RETURN_ADDRESS(0));
1580 KMP_POP_PARTITIONED_TIMER();
1581 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1595 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1596 __kmp_assert_valid_gtid(global_tid);
1598 if (!TCR_4(__kmp_init_parallel))
1599 __kmp_parallel_initialize();
1601 __kmp_resume_if_soft_paused();
1603 if (__kmp_env_consistency_check)
1604 __kmp_check_barrier(global_tid, ct_barrier, loc);
1607 ompt_frame_t *ompt_frame;
1608 if (ompt_enabled.enabled) {
1609 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1610 if (ompt_frame->enter_frame.ptr == NULL)
1611 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1613 OMPT_STORE_RETURN_ADDRESS(global_tid);
1616 __kmp_threads[global_tid]->th.th_ident = loc;
1618 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1619 #if OMPT_SUPPORT && OMPT_OPTIONAL
1620 if (ompt_enabled.enabled) {
1621 ompt_frame->enter_frame = ompt_data_none;
1625 return (status != 0) ? 0 : 1;
1638 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1639 __kmp_assert_valid_gtid(global_tid);
1640 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1655 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1656 __kmp_assert_valid_gtid(global_tid);
1658 if (!TCR_4(__kmp_init_parallel))
1659 __kmp_parallel_initialize();
1661 __kmp_resume_if_soft_paused();
1663 if (__kmp_env_consistency_check) {
1665 KMP_WARNING(ConstructIdentInvalid);
1667 __kmp_check_barrier(global_tid, ct_barrier, loc);
1671 ompt_frame_t *ompt_frame;
1672 if (ompt_enabled.enabled) {
1673 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1674 if (ompt_frame->enter_frame.ptr == NULL)
1675 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1677 OMPT_STORE_RETURN_ADDRESS(global_tid);
1680 __kmp_threads[global_tid]->th.th_ident = loc;
1682 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1683 #if OMPT_SUPPORT && OMPT_OPTIONAL
1684 if (ompt_enabled.enabled) {
1685 ompt_frame->enter_frame = ompt_data_none;
1691 if (__kmp_env_consistency_check) {
1697 __kmp_pop_sync(global_tid, ct_master, loc);
1717 __kmp_assert_valid_gtid(global_tid);
1718 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1723 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1726 #if OMPT_SUPPORT && OMPT_OPTIONAL
1727 kmp_info_t *this_thr = __kmp_threads[global_tid];
1728 kmp_team_t *team = this_thr->th.th_team;
1729 int tid = __kmp_tid_from_gtid(global_tid);
1731 if (ompt_enabled.enabled) {
1733 if (ompt_enabled.ompt_callback_work) {
1734 ompt_callbacks.ompt_callback(ompt_callback_work)(
1735 ompt_work_single_executor, ompt_scope_begin,
1736 &(team->t.ompt_team_info.parallel_data),
1737 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738 1, OMPT_GET_RETURN_ADDRESS(0));
1741 if (ompt_enabled.ompt_callback_work) {
1742 ompt_callbacks.ompt_callback(ompt_callback_work)(
1743 ompt_work_single_other, ompt_scope_begin,
1744 &(team->t.ompt_team_info.parallel_data),
1745 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1746 1, OMPT_GET_RETURN_ADDRESS(0));
1747 ompt_callbacks.ompt_callback(ompt_callback_work)(
1748 ompt_work_single_other, ompt_scope_end,
1749 &(team->t.ompt_team_info.parallel_data),
1750 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1751 1, OMPT_GET_RETURN_ADDRESS(0));
1770 __kmp_assert_valid_gtid(global_tid);
1771 __kmp_exit_single(global_tid);
1772 KMP_POP_PARTITIONED_TIMER();
1774 #if OMPT_SUPPORT && OMPT_OPTIONAL
1775 kmp_info_t *this_thr = __kmp_threads[global_tid];
1776 kmp_team_t *team = this_thr->th.th_team;
1777 int tid = __kmp_tid_from_gtid(global_tid);
1779 if (ompt_enabled.ompt_callback_work) {
1780 ompt_callbacks.ompt_callback(ompt_callback_work)(
1781 ompt_work_single_executor, ompt_scope_end,
1782 &(team->t.ompt_team_info.parallel_data),
1783 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1784 OMPT_GET_RETURN_ADDRESS(0));
1797 KMP_POP_PARTITIONED_TIMER();
1798 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1800 #if OMPT_SUPPORT && OMPT_OPTIONAL
1801 if (ompt_enabled.ompt_callback_work) {
1802 ompt_work_t ompt_work_type = ompt_work_loop;
1803 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1804 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1808 ompt_work_type = ompt_work_loop;
1810 ompt_work_type = ompt_work_sections;
1812 ompt_work_type = ompt_work_distribute;
1817 KMP_DEBUG_ASSERT(ompt_work_type);
1819 ompt_callbacks.ompt_callback(ompt_callback_work)(
1820 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1821 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1824 if (__kmp_env_consistency_check)
1825 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1831 void ompc_set_num_threads(
int arg) {
1833 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1836 void ompc_set_dynamic(
int flag) {
1840 thread = __kmp_entry_thread();
1842 __kmp_save_internal_controls(thread);
1844 set__dynamic(thread, flag ?
true :
false);
1847 void ompc_set_nested(
int flag) {
1851 thread = __kmp_entry_thread();
1853 __kmp_save_internal_controls(thread);
1855 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1858 void ompc_set_max_active_levels(
int max_active_levels) {
1863 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1866 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1868 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1871 int ompc_get_ancestor_thread_num(
int level) {
1872 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1875 int ompc_get_team_size(
int level) {
1876 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1881 void ompc_set_affinity_format(
char const *format) {
1882 if (!__kmp_init_serial) {
1883 __kmp_serial_initialize();
1885 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1886 format, KMP_STRLEN(format) + 1);
1889 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1891 if (!__kmp_init_serial) {
1892 __kmp_serial_initialize();
1894 format_size = KMP_STRLEN(__kmp_affinity_format);
1895 if (buffer && size) {
1896 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1902 void ompc_display_affinity(
char const *format) {
1904 if (!TCR_4(__kmp_init_middle)) {
1905 __kmp_middle_initialize();
1907 gtid = __kmp_get_gtid();
1908 __kmp_aux_display_affinity(gtid, format);
1911 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1912 char const *format) {
1914 size_t num_required;
1915 kmp_str_buf_t capture_buf;
1916 if (!TCR_4(__kmp_init_middle)) {
1917 __kmp_middle_initialize();
1919 gtid = __kmp_get_gtid();
1920 __kmp_str_buf_init(&capture_buf);
1921 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1922 if (buffer && buf_size) {
1923 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1924 capture_buf.used + 1);
1926 __kmp_str_buf_free(&capture_buf);
1927 return num_required;
1930 void kmpc_set_stacksize(
int arg) {
1932 __kmp_aux_set_stacksize(arg);
1935 void kmpc_set_stacksize_s(
size_t arg) {
1937 __kmp_aux_set_stacksize(arg);
1940 void kmpc_set_blocktime(
int arg) {
1944 gtid = __kmp_entry_gtid();
1945 tid = __kmp_tid_from_gtid(gtid);
1946 thread = __kmp_thread_from_gtid(gtid);
1948 __kmp_aux_set_blocktime(arg, thread, tid);
1951 void kmpc_set_library(
int arg) {
1953 __kmp_user_set_library((
enum library_type)arg);
1956 void kmpc_set_defaults(
char const *str) {
1958 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1961 void kmpc_set_disp_num_buffers(
int arg) {
1964 if (__kmp_init_serial == 0 && arg > 0)
1965 __kmp_dispatch_num_buffers = arg;
1968 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1969 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1972 if (!TCR_4(__kmp_init_middle)) {
1973 __kmp_middle_initialize();
1975 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1979 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1980 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1983 if (!TCR_4(__kmp_init_middle)) {
1984 __kmp_middle_initialize();
1986 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1990 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
1991 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1994 if (!TCR_4(__kmp_init_middle)) {
1995 __kmp_middle_initialize();
1997 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2047 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2050 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2051 __kmp_assert_valid_gtid(gtid);
2055 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2057 if (__kmp_env_consistency_check) {
2059 KMP_WARNING(ConstructIdentInvalid);
2066 *data_ptr = cpy_data;
2069 ompt_frame_t *ompt_frame;
2070 if (ompt_enabled.enabled) {
2071 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2072 if (ompt_frame->enter_frame.ptr == NULL)
2073 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2075 OMPT_STORE_RETURN_ADDRESS(gtid);
2079 __kmp_threads[gtid]->th.th_ident = loc;
2081 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2084 (*cpy_func)(cpy_data, *data_ptr);
2090 OMPT_STORE_RETURN_ADDRESS(gtid);
2093 __kmp_threads[gtid]->th.th_ident = loc;
2096 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2097 #if OMPT_SUPPORT && OMPT_OPTIONAL
2098 if (ompt_enabled.enabled) {
2099 ompt_frame->enter_frame = ompt_data_none;
2107 #define INIT_LOCK __kmp_init_user_lock_with_checks
2108 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2109 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2110 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2111 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2112 #define ACQUIRE_NESTED_LOCK_TIMED \
2113 __kmp_acquire_nested_user_lock_with_checks_timed
2114 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2115 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2116 #define TEST_LOCK __kmp_test_user_lock_with_checks
2117 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2118 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2119 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2124 #if KMP_USE_DYNAMIC_LOCK
2127 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2128 kmp_dyna_lockseq_t seq) {
2129 if (KMP_IS_D_LOCK(seq)) {
2130 KMP_INIT_D_LOCK(lock, seq);
2132 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2135 KMP_INIT_I_LOCK(lock, seq);
2137 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2138 __kmp_itt_lock_creating(ilk->lock, loc);
2144 static __forceinline
void
2145 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2146 kmp_dyna_lockseq_t seq) {
2149 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2150 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2151 seq = __kmp_user_lock_seq;
2155 seq = lockseq_nested_tas;
2159 seq = lockseq_nested_futex;
2162 case lockseq_ticket:
2163 seq = lockseq_nested_ticket;
2165 case lockseq_queuing:
2166 seq = lockseq_nested_queuing;
2169 seq = lockseq_nested_drdpa;
2172 seq = lockseq_nested_queuing;
2174 KMP_INIT_I_LOCK(lock, seq);
2176 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2177 __kmp_itt_lock_creating(ilk->lock, loc);
2182 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2184 KMP_DEBUG_ASSERT(__kmp_init_serial);
2185 if (__kmp_env_consistency_check && user_lock == NULL) {
2186 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2189 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2191 #if OMPT_SUPPORT && OMPT_OPTIONAL
2193 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2195 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2196 if (ompt_enabled.ompt_callback_lock_init) {
2197 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2198 ompt_mutex_lock, (omp_lock_hint_t)hint,
2199 __ompt_get_mutex_impl_type(user_lock),
2200 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2206 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2207 void **user_lock, uintptr_t hint) {
2208 KMP_DEBUG_ASSERT(__kmp_init_serial);
2209 if (__kmp_env_consistency_check && user_lock == NULL) {
2210 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2213 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2215 #if OMPT_SUPPORT && OMPT_OPTIONAL
2217 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2219 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2220 if (ompt_enabled.ompt_callback_lock_init) {
2221 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2222 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2223 __ompt_get_mutex_impl_type(user_lock),
2224 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2232 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2233 #if KMP_USE_DYNAMIC_LOCK
2235 KMP_DEBUG_ASSERT(__kmp_init_serial);
2236 if (__kmp_env_consistency_check && user_lock == NULL) {
2237 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2239 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2241 #if OMPT_SUPPORT && OMPT_OPTIONAL
2243 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2245 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2246 if (ompt_enabled.ompt_callback_lock_init) {
2247 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2248 ompt_mutex_lock, omp_lock_hint_none,
2249 __ompt_get_mutex_impl_type(user_lock),
2250 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2256 static char const *
const func =
"omp_init_lock";
2257 kmp_user_lock_p lck;
2258 KMP_DEBUG_ASSERT(__kmp_init_serial);
2260 if (__kmp_env_consistency_check) {
2261 if (user_lock == NULL) {
2262 KMP_FATAL(LockIsUninitialized, func);
2266 KMP_CHECK_USER_LOCK_INIT();
2268 if ((__kmp_user_lock_kind == lk_tas) &&
2269 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2270 lck = (kmp_user_lock_p)user_lock;
2273 else if ((__kmp_user_lock_kind == lk_futex) &&
2274 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2275 lck = (kmp_user_lock_p)user_lock;
2279 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2282 __kmp_set_user_lock_location(lck, loc);
2284 #if OMPT_SUPPORT && OMPT_OPTIONAL
2286 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2288 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2289 if (ompt_enabled.ompt_callback_lock_init) {
2290 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2291 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2292 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2297 __kmp_itt_lock_creating(lck);
2304 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2305 #if KMP_USE_DYNAMIC_LOCK
2307 KMP_DEBUG_ASSERT(__kmp_init_serial);
2308 if (__kmp_env_consistency_check && user_lock == NULL) {
2309 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2311 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2313 #if OMPT_SUPPORT && OMPT_OPTIONAL
2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2317 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318 if (ompt_enabled.ompt_callback_lock_init) {
2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320 ompt_mutex_nest_lock, omp_lock_hint_none,
2321 __ompt_get_mutex_impl_type(user_lock),
2322 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2328 static char const *
const func =
"omp_init_nest_lock";
2329 kmp_user_lock_p lck;
2330 KMP_DEBUG_ASSERT(__kmp_init_serial);
2332 if (__kmp_env_consistency_check) {
2333 if (user_lock == NULL) {
2334 KMP_FATAL(LockIsUninitialized, func);
2338 KMP_CHECK_USER_LOCK_INIT();
2340 if ((__kmp_user_lock_kind == lk_tas) &&
2341 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2342 OMP_NEST_LOCK_T_SIZE)) {
2343 lck = (kmp_user_lock_p)user_lock;
2346 else if ((__kmp_user_lock_kind == lk_futex) &&
2347 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2348 OMP_NEST_LOCK_T_SIZE)) {
2349 lck = (kmp_user_lock_p)user_lock;
2353 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2356 INIT_NESTED_LOCK(lck);
2357 __kmp_set_user_lock_location(lck, loc);
2359 #if OMPT_SUPPORT && OMPT_OPTIONAL
2361 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2363 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2364 if (ompt_enabled.ompt_callback_lock_init) {
2365 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2366 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2367 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2372 __kmp_itt_lock_creating(lck);
2378 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2379 #if KMP_USE_DYNAMIC_LOCK
2382 kmp_user_lock_p lck;
2383 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2384 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2386 lck = (kmp_user_lock_p)user_lock;
2388 __kmp_itt_lock_destroyed(lck);
2390 #if OMPT_SUPPORT && OMPT_OPTIONAL
2392 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2394 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2395 if (ompt_enabled.ompt_callback_lock_destroy) {
2396 kmp_user_lock_p lck;
2397 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2398 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2400 lck = (kmp_user_lock_p)user_lock;
2402 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2403 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2406 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2408 kmp_user_lock_p lck;
2410 if ((__kmp_user_lock_kind == lk_tas) &&
2411 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2412 lck = (kmp_user_lock_p)user_lock;
2415 else if ((__kmp_user_lock_kind == lk_futex) &&
2416 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2417 lck = (kmp_user_lock_p)user_lock;
2421 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2424 #if OMPT_SUPPORT && OMPT_OPTIONAL
2426 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2428 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2429 if (ompt_enabled.ompt_callback_lock_destroy) {
2430 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2431 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2436 __kmp_itt_lock_destroyed(lck);
2440 if ((__kmp_user_lock_kind == lk_tas) &&
2441 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2445 else if ((__kmp_user_lock_kind == lk_futex) &&
2446 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2451 __kmp_user_lock_free(user_lock, gtid, lck);
2457 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2458 #if KMP_USE_DYNAMIC_LOCK
2461 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2462 __kmp_itt_lock_destroyed(ilk->lock);
2464 #if OMPT_SUPPORT && OMPT_OPTIONAL
2466 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2468 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2469 if (ompt_enabled.ompt_callback_lock_destroy) {
2470 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2471 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2474 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2478 kmp_user_lock_p lck;
2480 if ((__kmp_user_lock_kind == lk_tas) &&
2481 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2482 OMP_NEST_LOCK_T_SIZE)) {
2483 lck = (kmp_user_lock_p)user_lock;
2486 else if ((__kmp_user_lock_kind == lk_futex) &&
2487 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2488 OMP_NEST_LOCK_T_SIZE)) {
2489 lck = (kmp_user_lock_p)user_lock;
2493 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2496 #if OMPT_SUPPORT && OMPT_OPTIONAL
2498 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2500 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501 if (ompt_enabled.ompt_callback_lock_destroy) {
2502 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2503 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2508 __kmp_itt_lock_destroyed(lck);
2511 DESTROY_NESTED_LOCK(lck);
2513 if ((__kmp_user_lock_kind == lk_tas) &&
2514 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2515 OMP_NEST_LOCK_T_SIZE)) {
2519 else if ((__kmp_user_lock_kind == lk_futex) &&
2520 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2521 OMP_NEST_LOCK_T_SIZE)) {
2526 __kmp_user_lock_free(user_lock, gtid, lck);
2531 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2533 #if KMP_USE_DYNAMIC_LOCK
2534 int tag = KMP_EXTRACT_D_TAG(user_lock);
2536 __kmp_itt_lock_acquiring(
2540 #if OMPT_SUPPORT && OMPT_OPTIONAL
2542 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2544 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2545 if (ompt_enabled.ompt_callback_mutex_acquire) {
2546 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2547 ompt_mutex_lock, omp_lock_hint_none,
2548 __ompt_get_mutex_impl_type(user_lock),
2549 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2552 #if KMP_USE_INLINED_TAS
2553 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2554 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2556 #elif KMP_USE_INLINED_FUTEX
2557 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2558 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2562 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2565 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2567 #if OMPT_SUPPORT && OMPT_OPTIONAL
2568 if (ompt_enabled.ompt_callback_mutex_acquired) {
2569 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2570 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2576 kmp_user_lock_p lck;
2578 if ((__kmp_user_lock_kind == lk_tas) &&
2579 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2580 lck = (kmp_user_lock_p)user_lock;
2583 else if ((__kmp_user_lock_kind == lk_futex) &&
2584 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2585 lck = (kmp_user_lock_p)user_lock;
2589 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2593 __kmp_itt_lock_acquiring(lck);
2595 #if OMPT_SUPPORT && OMPT_OPTIONAL
2597 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2599 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2600 if (ompt_enabled.ompt_callback_mutex_acquire) {
2601 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2602 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2603 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2607 ACQUIRE_LOCK(lck, gtid);
2610 __kmp_itt_lock_acquired(lck);
2613 #if OMPT_SUPPORT && OMPT_OPTIONAL
2614 if (ompt_enabled.ompt_callback_mutex_acquired) {
2615 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2616 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2623 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2624 #if KMP_USE_DYNAMIC_LOCK
2627 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2629 #if OMPT_SUPPORT && OMPT_OPTIONAL
2631 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2633 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2634 if (ompt_enabled.enabled) {
2635 if (ompt_enabled.ompt_callback_mutex_acquire) {
2636 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2637 ompt_mutex_nest_lock, omp_lock_hint_none,
2638 __ompt_get_mutex_impl_type(user_lock),
2639 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2643 int acquire_status =
2644 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2645 (void) acquire_status;
2647 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2650 #if OMPT_SUPPORT && OMPT_OPTIONAL
2651 if (ompt_enabled.enabled) {
2652 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2653 if (ompt_enabled.ompt_callback_mutex_acquired) {
2655 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2656 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2660 if (ompt_enabled.ompt_callback_nest_lock) {
2662 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2663 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2671 kmp_user_lock_p lck;
2673 if ((__kmp_user_lock_kind == lk_tas) &&
2674 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2675 OMP_NEST_LOCK_T_SIZE)) {
2676 lck = (kmp_user_lock_p)user_lock;
2679 else if ((__kmp_user_lock_kind == lk_futex) &&
2680 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2681 OMP_NEST_LOCK_T_SIZE)) {
2682 lck = (kmp_user_lock_p)user_lock;
2686 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2690 __kmp_itt_lock_acquiring(lck);
2692 #if OMPT_SUPPORT && OMPT_OPTIONAL
2694 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2696 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2697 if (ompt_enabled.enabled) {
2698 if (ompt_enabled.ompt_callback_mutex_acquire) {
2699 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2700 ompt_mutex_nest_lock, omp_lock_hint_none,
2701 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2707 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2710 __kmp_itt_lock_acquired(lck);
2713 #if OMPT_SUPPORT && OMPT_OPTIONAL
2714 if (ompt_enabled.enabled) {
2715 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2716 if (ompt_enabled.ompt_callback_mutex_acquired) {
2718 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2719 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2722 if (ompt_enabled.ompt_callback_nest_lock) {
2724 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2725 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2734 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2735 #if KMP_USE_DYNAMIC_LOCK
2737 int tag = KMP_EXTRACT_D_TAG(user_lock);
2739 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2741 #if KMP_USE_INLINED_TAS
2742 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2743 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2745 #elif KMP_USE_INLINED_FUTEX
2746 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2747 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2751 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2754 #if OMPT_SUPPORT && OMPT_OPTIONAL
2756 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2758 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2759 if (ompt_enabled.ompt_callback_mutex_released) {
2760 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2761 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2767 kmp_user_lock_p lck;
2772 if ((__kmp_user_lock_kind == lk_tas) &&
2773 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2774 #if KMP_OS_LINUX && \
2775 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2778 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2780 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2783 #if OMPT_SUPPORT && OMPT_OPTIONAL
2785 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2787 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2788 if (ompt_enabled.ompt_callback_mutex_released) {
2789 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2790 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2796 lck = (kmp_user_lock_p)user_lock;
2800 else if ((__kmp_user_lock_kind == lk_futex) &&
2801 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2802 lck = (kmp_user_lock_p)user_lock;
2806 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2810 __kmp_itt_lock_releasing(lck);
2813 RELEASE_LOCK(lck, gtid);
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.ompt_callback_mutex_released) {
2821 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2822 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2830 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2831 #if KMP_USE_DYNAMIC_LOCK
2834 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2836 int release_status =
2837 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2838 (void) release_status;
2840 #if OMPT_SUPPORT && OMPT_OPTIONAL
2842 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2844 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2845 if (ompt_enabled.enabled) {
2846 if (release_status == KMP_LOCK_RELEASED) {
2847 if (ompt_enabled.ompt_callback_mutex_released) {
2849 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2850 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2853 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2855 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2856 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2863 kmp_user_lock_p lck;
2867 if ((__kmp_user_lock_kind == lk_tas) &&
2868 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2869 OMP_NEST_LOCK_T_SIZE)) {
2870 #if KMP_OS_LINUX && \
2871 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2873 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2875 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2878 #if OMPT_SUPPORT && OMPT_OPTIONAL
2879 int release_status = KMP_LOCK_STILL_HELD;
2882 if (--(tl->lk.depth_locked) == 0) {
2883 TCW_4(tl->lk.poll, 0);
2884 #if OMPT_SUPPORT && OMPT_OPTIONAL
2885 release_status = KMP_LOCK_RELEASED;
2890 #if OMPT_SUPPORT && OMPT_OPTIONAL
2892 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2894 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2895 if (ompt_enabled.enabled) {
2896 if (release_status == KMP_LOCK_RELEASED) {
2897 if (ompt_enabled.ompt_callback_mutex_released) {
2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2900 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2902 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2904 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2905 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2912 lck = (kmp_user_lock_p)user_lock;
2916 else if ((__kmp_user_lock_kind == lk_futex) &&
2917 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2918 OMP_NEST_LOCK_T_SIZE)) {
2919 lck = (kmp_user_lock_p)user_lock;
2923 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2927 __kmp_itt_lock_releasing(lck);
2931 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2932 #if OMPT_SUPPORT && OMPT_OPTIONAL
2934 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2936 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2937 if (ompt_enabled.enabled) {
2938 if (release_status == KMP_LOCK_RELEASED) {
2939 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2944 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2946 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2947 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2956 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2959 #if KMP_USE_DYNAMIC_LOCK
2961 int tag = KMP_EXTRACT_D_TAG(user_lock);
2963 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2965 #if OMPT_SUPPORT && OMPT_OPTIONAL
2967 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2969 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2970 if (ompt_enabled.ompt_callback_mutex_acquire) {
2971 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2972 ompt_mutex_lock, omp_lock_hint_none,
2973 __ompt_get_mutex_impl_type(user_lock),
2974 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2977 #if KMP_USE_INLINED_TAS
2978 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2979 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2981 #elif KMP_USE_INLINED_FUTEX
2982 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2983 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2987 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2991 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2993 #if OMPT_SUPPORT && OMPT_OPTIONAL
2994 if (ompt_enabled.ompt_callback_mutex_acquired) {
2995 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2996 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3002 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3009 kmp_user_lock_p lck;
3012 if ((__kmp_user_lock_kind == lk_tas) &&
3013 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3014 lck = (kmp_user_lock_p)user_lock;
3017 else if ((__kmp_user_lock_kind == lk_futex) &&
3018 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3019 lck = (kmp_user_lock_p)user_lock;
3023 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3027 __kmp_itt_lock_acquiring(lck);
3029 #if OMPT_SUPPORT && OMPT_OPTIONAL
3031 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3033 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034 if (ompt_enabled.ompt_callback_mutex_acquire) {
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3036 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3037 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3041 rc = TEST_LOCK(lck, gtid);
3044 __kmp_itt_lock_acquired(lck);
3046 __kmp_itt_lock_cancelled(lck);
3049 #if OMPT_SUPPORT && OMPT_OPTIONAL
3050 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3051 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3052 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3056 return (rc ? FTN_TRUE : FTN_FALSE);
3064 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3065 #if KMP_USE_DYNAMIC_LOCK
3068 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3070 #if OMPT_SUPPORT && OMPT_OPTIONAL
3072 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3074 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3075 if (ompt_enabled.ompt_callback_mutex_acquire) {
3076 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3077 ompt_mutex_nest_lock, omp_lock_hint_none,
3078 __ompt_get_mutex_impl_type(user_lock),
3079 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3082 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3085 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3087 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3090 #if OMPT_SUPPORT && OMPT_OPTIONAL
3091 if (ompt_enabled.enabled && rc) {
3093 if (ompt_enabled.ompt_callback_mutex_acquired) {
3095 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3096 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3100 if (ompt_enabled.ompt_callback_nest_lock) {
3102 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3103 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3112 kmp_user_lock_p lck;
3115 if ((__kmp_user_lock_kind == lk_tas) &&
3116 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3117 OMP_NEST_LOCK_T_SIZE)) {
3118 lck = (kmp_user_lock_p)user_lock;
3121 else if ((__kmp_user_lock_kind == lk_futex) &&
3122 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3123 OMP_NEST_LOCK_T_SIZE)) {
3124 lck = (kmp_user_lock_p)user_lock;
3128 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3132 __kmp_itt_lock_acquiring(lck);
3135 #if OMPT_SUPPORT && OMPT_OPTIONAL
3137 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3139 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3140 if (ompt_enabled.enabled) &&
3141 ompt_enabled.ompt_callback_mutex_acquire) {
3142 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3143 ompt_mutex_nest_lock, omp_lock_hint_none,
3144 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3149 rc = TEST_NESTED_LOCK(lck, gtid);
3152 __kmp_itt_lock_acquired(lck);
3154 __kmp_itt_lock_cancelled(lck);
3157 #if OMPT_SUPPORT && OMPT_OPTIONAL
3158 if (ompt_enabled.enabled && rc) {
3160 if (ompt_enabled.ompt_callback_mutex_acquired) {
3162 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3163 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3166 if (ompt_enabled.ompt_callback_nest_lock) {
3168 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3169 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3188 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3189 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3191 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3192 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3198 static __forceinline
void
3199 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3200 kmp_critical_name *crit) {
3206 kmp_user_lock_p lck;
3208 #if KMP_USE_DYNAMIC_LOCK
3210 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3213 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3214 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3215 KMP_GET_D_TAG(__kmp_user_lock_seq));
3217 __kmp_init_indirect_csptr(crit, loc, global_tid,
3218 KMP_GET_I_TAG(__kmp_user_lock_seq));
3224 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3225 lck = (kmp_user_lock_p)lk;
3226 KMP_DEBUG_ASSERT(lck != NULL);
3227 if (__kmp_env_consistency_check) {
3228 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3230 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3232 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3234 KMP_DEBUG_ASSERT(lck != NULL);
3235 if (__kmp_env_consistency_check) {
3236 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3238 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3246 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3247 lck = (kmp_user_lock_p)crit;
3249 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3251 KMP_DEBUG_ASSERT(lck != NULL);
3253 if (__kmp_env_consistency_check)
3254 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3256 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3262 static __forceinline
void
3263 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3264 kmp_critical_name *crit) {
3266 kmp_user_lock_p lck;
3268 #if KMP_USE_DYNAMIC_LOCK
3270 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3271 lck = (kmp_user_lock_p)crit;
3272 if (__kmp_env_consistency_check)
3273 __kmp_pop_sync(global_tid, ct_critical, loc);
3274 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3276 kmp_indirect_lock_t *ilk =
3277 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3278 if (__kmp_env_consistency_check)
3279 __kmp_pop_sync(global_tid, ct_critical, loc);
3280 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3288 if (__kmp_base_user_lock_size > 32) {
3289 lck = *((kmp_user_lock_p *)crit);
3290 KMP_ASSERT(lck != NULL);
3292 lck = (kmp_user_lock_p)crit;
3295 if (__kmp_env_consistency_check)
3296 __kmp_pop_sync(global_tid, ct_critical, loc);
3298 __kmp_release_user_lock_with_checks(lck, global_tid);
3303 static __forceinline
int
3304 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3309 if (th->th.th_teams_microtask) {
3310 *team_p = team = th->th.th_team;
3311 if (team->t.t_level == th->th.th_teams_level) {
3313 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3315 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3316 th->th.th_team = team->t.t_parent;
3317 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3318 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3319 *task_state = th->th.th_task_state;
3320 th->th.th_task_state = 0;
3328 static __forceinline
void
3329 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3331 th->th.th_info.ds.ds_tid = 0;
3332 th->th.th_team = team;
3333 th->th.th_team_nproc = team->t.t_nproc;
3334 th->th.th_task_team = team->t.t_task_team[task_state];
3335 __kmp_type_convert(task_state, &(th->th.th_task_state));
3356 size_t reduce_size,
void *reduce_data,
3357 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3358 kmp_critical_name *lck) {
3362 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3365 int teams_swapped = 0, task_state;
3366 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3367 __kmp_assert_valid_gtid(global_tid);
3375 if (!TCR_4(__kmp_init_parallel))
3376 __kmp_parallel_initialize();
3378 __kmp_resume_if_soft_paused();
3381 #if KMP_USE_DYNAMIC_LOCK
3382 if (__kmp_env_consistency_check)
3383 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3385 if (__kmp_env_consistency_check)
3386 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3389 th = __kmp_thread_from_gtid(global_tid);
3390 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3408 packed_reduction_method = __kmp_determine_reduction_method(
3409 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3410 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3412 OMPT_REDUCTION_DECL(th, global_tid);
3413 if (packed_reduction_method == critical_reduce_block) {
3415 OMPT_REDUCTION_BEGIN;
3417 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3420 }
else if (packed_reduction_method == empty_reduce_block) {
3422 OMPT_REDUCTION_BEGIN;
3428 }
else if (packed_reduction_method == atomic_reduce_block) {
3438 if (__kmp_env_consistency_check)
3439 __kmp_pop_sync(global_tid, ct_reduce, loc);
3441 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3442 tree_reduce_block)) {
3462 ompt_frame_t *ompt_frame;
3463 if (ompt_enabled.enabled) {
3464 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3465 if (ompt_frame->enter_frame.ptr == NULL)
3466 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3468 OMPT_STORE_RETURN_ADDRESS(global_tid);
3471 __kmp_threads[global_tid]->th.th_ident = loc;
3474 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3475 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3476 retval = (retval != 0) ? (0) : (1);
3477 #if OMPT_SUPPORT && OMPT_OPTIONAL
3478 if (ompt_enabled.enabled) {
3479 ompt_frame->enter_frame = ompt_data_none;
3485 if (__kmp_env_consistency_check) {
3487 __kmp_pop_sync(global_tid, ct_reduce, loc);
3496 if (teams_swapped) {
3497 __kmp_restore_swapped_teams(th, team, task_state);
3501 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3502 global_tid, packed_reduction_method, retval));
3516 kmp_critical_name *lck) {
3518 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3520 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3521 __kmp_assert_valid_gtid(global_tid);
3523 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3525 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3527 if (packed_reduction_method == critical_reduce_block) {
3529 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3532 }
else if (packed_reduction_method == empty_reduce_block) {
3539 }
else if (packed_reduction_method == atomic_reduce_block) {
3546 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3547 tree_reduce_block)) {
3558 if (__kmp_env_consistency_check)
3559 __kmp_pop_sync(global_tid, ct_reduce, loc);
3561 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3562 global_tid, packed_reduction_method));
3585 size_t reduce_size,
void *reduce_data,
3586 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3587 kmp_critical_name *lck) {
3590 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3593 int teams_swapped = 0, task_state;
3595 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3596 __kmp_assert_valid_gtid(global_tid);
3604 if (!TCR_4(__kmp_init_parallel))
3605 __kmp_parallel_initialize();
3607 __kmp_resume_if_soft_paused();
3610 #if KMP_USE_DYNAMIC_LOCK
3611 if (__kmp_env_consistency_check)
3612 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3614 if (__kmp_env_consistency_check)
3615 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3618 th = __kmp_thread_from_gtid(global_tid);
3619 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3621 packed_reduction_method = __kmp_determine_reduction_method(
3622 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3623 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3625 OMPT_REDUCTION_DECL(th, global_tid);
3627 if (packed_reduction_method == critical_reduce_block) {
3629 OMPT_REDUCTION_BEGIN;
3630 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3633 }
else if (packed_reduction_method == empty_reduce_block) {
3635 OMPT_REDUCTION_BEGIN;
3640 }
else if (packed_reduction_method == atomic_reduce_block) {
3644 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3645 tree_reduce_block)) {
3651 ompt_frame_t *ompt_frame;
3652 if (ompt_enabled.enabled) {
3653 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3654 if (ompt_frame->enter_frame.ptr == NULL)
3655 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3657 OMPT_STORE_RETURN_ADDRESS(global_tid);
3660 __kmp_threads[global_tid]->th.th_ident =
3664 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3665 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3666 retval = (retval != 0) ? (0) : (1);
3667 #if OMPT_SUPPORT && OMPT_OPTIONAL
3668 if (ompt_enabled.enabled) {
3669 ompt_frame->enter_frame = ompt_data_none;
3675 if (__kmp_env_consistency_check) {
3677 __kmp_pop_sync(global_tid, ct_reduce, loc);
3686 if (teams_swapped) {
3687 __kmp_restore_swapped_teams(th, team, task_state);
3691 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3692 global_tid, packed_reduction_method, retval));
3707 kmp_critical_name *lck) {
3709 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3712 int teams_swapped = 0, task_state;
3714 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3715 __kmp_assert_valid_gtid(global_tid);
3717 th = __kmp_thread_from_gtid(global_tid);
3718 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3720 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3724 OMPT_REDUCTION_DECL(th, global_tid);
3726 if (packed_reduction_method == critical_reduce_block) {
3727 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3733 ompt_frame_t *ompt_frame;
3734 if (ompt_enabled.enabled) {
3735 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3736 if (ompt_frame->enter_frame.ptr == NULL)
3737 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3739 OMPT_STORE_RETURN_ADDRESS(global_tid);
3742 __kmp_threads[global_tid]->th.th_ident = loc;
3744 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3745 #if OMPT_SUPPORT && OMPT_OPTIONAL
3746 if (ompt_enabled.enabled) {
3747 ompt_frame->enter_frame = ompt_data_none;
3751 }
else if (packed_reduction_method == empty_reduce_block) {
3759 ompt_frame_t *ompt_frame;
3760 if (ompt_enabled.enabled) {
3761 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3762 if (ompt_frame->enter_frame.ptr == NULL)
3763 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3765 OMPT_STORE_RETURN_ADDRESS(global_tid);
3768 __kmp_threads[global_tid]->th.th_ident = loc;
3770 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3771 #if OMPT_SUPPORT && OMPT_OPTIONAL
3772 if (ompt_enabled.enabled) {
3773 ompt_frame->enter_frame = ompt_data_none;
3777 }
else if (packed_reduction_method == atomic_reduce_block) {
3780 ompt_frame_t *ompt_frame;
3781 if (ompt_enabled.enabled) {
3782 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3783 if (ompt_frame->enter_frame.ptr == NULL)
3784 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3786 OMPT_STORE_RETURN_ADDRESS(global_tid);
3790 __kmp_threads[global_tid]->th.th_ident = loc;
3792 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3793 #if OMPT_SUPPORT && OMPT_OPTIONAL
3794 if (ompt_enabled.enabled) {
3795 ompt_frame->enter_frame = ompt_data_none;
3799 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3800 tree_reduce_block)) {
3803 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3811 if (teams_swapped) {
3812 __kmp_restore_swapped_teams(th, team, task_state);
3815 if (__kmp_env_consistency_check)
3816 __kmp_pop_sync(global_tid, ct_reduce, loc);
3818 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3819 global_tid, packed_reduction_method));
3824 #undef __KMP_GET_REDUCTION_METHOD
3825 #undef __KMP_SET_REDUCTION_METHOD
3829 kmp_uint64 __kmpc_get_taskid() {
3834 gtid = __kmp_get_gtid();
3838 thread = __kmp_thread_from_gtid(gtid);
3839 return thread->th.th_current_task->td_task_id;
3843 kmp_uint64 __kmpc_get_parent_taskid() {
3847 kmp_taskdata_t *parent_task;
3849 gtid = __kmp_get_gtid();
3853 thread = __kmp_thread_from_gtid(gtid);
3854 parent_task = thread->th.th_current_task->td_parent;
3855 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3871 const struct kmp_dim *dims) {
3872 __kmp_assert_valid_gtid(gtid);
3874 kmp_int64 last, trace_count;
3875 kmp_info_t *th = __kmp_threads[gtid];
3876 kmp_team_t *team = th->th.th_team;
3878 kmp_disp_t *pr_buf = th->th.th_dispatch;
3879 dispatch_shared_info_t *sh_buf;
3883 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3884 gtid, num_dims, !team->t.t_serialized));
3885 KMP_DEBUG_ASSERT(dims != NULL);
3886 KMP_DEBUG_ASSERT(num_dims > 0);
3888 if (team->t.t_serialized) {
3889 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3892 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3893 idx = pr_buf->th_doacross_buf_idx++;
3895 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3898 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3899 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3900 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3901 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3902 pr_buf->th_doacross_info[0] =
3903 (kmp_int64)num_dims;
3906 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3907 pr_buf->th_doacross_info[2] = dims[0].lo;
3908 pr_buf->th_doacross_info[3] = dims[0].up;
3909 pr_buf->th_doacross_info[4] = dims[0].st;
3911 for (j = 1; j < num_dims; ++j) {
3914 if (dims[j].st == 1) {
3916 range_length = dims[j].up - dims[j].lo + 1;
3918 if (dims[j].st > 0) {
3919 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3920 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3922 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3924 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3927 pr_buf->th_doacross_info[last++] = range_length;
3928 pr_buf->th_doacross_info[last++] = dims[j].lo;
3929 pr_buf->th_doacross_info[last++] = dims[j].up;
3930 pr_buf->th_doacross_info[last++] = dims[j].st;
3935 if (dims[0].st == 1) {
3936 trace_count = dims[0].up - dims[0].lo + 1;
3937 }
else if (dims[0].st > 0) {
3938 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3939 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3941 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3942 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3944 for (j = 1; j < num_dims; ++j) {
3945 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3947 KMP_DEBUG_ASSERT(trace_count > 0);
3951 if (idx != sh_buf->doacross_buf_idx) {
3953 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3960 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3961 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3963 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3964 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3966 if (flags == NULL) {
3969 (size_t)trace_count / 8 + 8;
3970 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3972 sh_buf->doacross_flags = flags;
3973 }
else if (flags == (kmp_uint32 *)1) {
3976 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3978 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3985 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3986 pr_buf->th_doacross_flags =
3987 sh_buf->doacross_flags;
3989 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
3992 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
3993 __kmp_assert_valid_gtid(gtid);
3997 kmp_int64 iter_number;
3998 kmp_info_t *th = __kmp_threads[gtid];
3999 kmp_team_t *team = th->th.th_team;
4001 kmp_int64 lo, up, st;
4003 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4004 if (team->t.t_serialized) {
4005 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4010 pr_buf = th->th.th_dispatch;
4011 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4012 num_dims = (size_t)pr_buf->th_doacross_info[0];
4013 lo = pr_buf->th_doacross_info[2];
4014 up = pr_buf->th_doacross_info[3];
4015 st = pr_buf->th_doacross_info[4];
4016 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4017 ompt_dependence_t deps[num_dims];
4020 if (vec[0] < lo || vec[0] > up) {
4021 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4022 "bounds [%lld,%lld]\n",
4023 gtid, vec[0], lo, up));
4026 iter_number = vec[0] - lo;
4027 }
else if (st > 0) {
4028 if (vec[0] < lo || vec[0] > up) {
4029 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030 "bounds [%lld,%lld]\n",
4031 gtid, vec[0], lo, up));
4034 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4036 if (vec[0] > lo || vec[0] < up) {
4037 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038 "bounds [%lld,%lld]\n",
4039 gtid, vec[0], lo, up));
4042 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4044 #if OMPT_SUPPORT && OMPT_OPTIONAL
4045 deps[0].variable.value = iter_number;
4046 deps[0].dependence_type = ompt_dependence_type_sink;
4048 for (i = 1; i < num_dims; ++i) {
4051 ln = pr_buf->th_doacross_info[j + 1];
4052 lo = pr_buf->th_doacross_info[j + 2];
4053 up = pr_buf->th_doacross_info[j + 3];
4054 st = pr_buf->th_doacross_info[j + 4];
4056 if (vec[i] < lo || vec[i] > up) {
4057 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4058 "bounds [%lld,%lld]\n",
4059 gtid, vec[i], lo, up));
4063 }
else if (st > 0) {
4064 if (vec[i] < lo || vec[i] > up) {
4065 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4066 "bounds [%lld,%lld]\n",
4067 gtid, vec[i], lo, up));
4070 iter = (kmp_uint64)(vec[i] - lo) / st;
4072 if (vec[i] > lo || vec[i] < up) {
4073 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4074 "bounds [%lld,%lld]\n",
4075 gtid, vec[i], lo, up));
4078 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4080 iter_number = iter + ln * iter_number;
4081 #if OMPT_SUPPORT && OMPT_OPTIONAL
4082 deps[i].variable.value = iter;
4083 deps[i].dependence_type = ompt_dependence_type_sink;
4086 shft = iter_number % 32;
4089 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4093 #if OMPT_SUPPORT && OMPT_OPTIONAL
4094 if (ompt_enabled.ompt_callback_dependences) {
4095 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4096 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4100 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4101 gtid, (iter_number << 5) + shft));
4104 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4105 __kmp_assert_valid_gtid(gtid);
4109 kmp_int64 iter_number;
4110 kmp_info_t *th = __kmp_threads[gtid];
4111 kmp_team_t *team = th->th.th_team;
4115 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4116 if (team->t.t_serialized) {
4117 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4123 pr_buf = th->th.th_dispatch;
4124 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4125 num_dims = (size_t)pr_buf->th_doacross_info[0];
4126 lo = pr_buf->th_doacross_info[2];
4127 st = pr_buf->th_doacross_info[4];
4128 #
if OMPT_SUPPORT && OMPT_OPTIONAL
4129 ompt_dependence_t deps[num_dims];
4132 iter_number = vec[0] - lo;
4133 }
else if (st > 0) {
4134 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4136 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4138 #if OMPT_SUPPORT && OMPT_OPTIONAL
4139 deps[0].variable.value = iter_number;
4140 deps[0].dependence_type = ompt_dependence_type_source;
4142 for (i = 1; i < num_dims; ++i) {
4145 ln = pr_buf->th_doacross_info[j + 1];
4146 lo = pr_buf->th_doacross_info[j + 2];
4147 st = pr_buf->th_doacross_info[j + 4];
4150 }
else if (st > 0) {
4151 iter = (kmp_uint64)(vec[i] - lo) / st;
4153 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4155 iter_number = iter + ln * iter_number;
4156 #if OMPT_SUPPORT && OMPT_OPTIONAL
4157 deps[i].variable.value = iter;
4158 deps[i].dependence_type = ompt_dependence_type_source;
4161 #if OMPT_SUPPORT && OMPT_OPTIONAL
4162 if (ompt_enabled.ompt_callback_dependences) {
4163 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4164 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4167 shft = iter_number % 32;
4171 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4172 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4173 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4174 (iter_number << 5) + shft));
4177 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4178 __kmp_assert_valid_gtid(gtid);
4180 kmp_info_t *th = __kmp_threads[gtid];
4181 kmp_team_t *team = th->th.th_team;
4182 kmp_disp_t *pr_buf = th->th.th_dispatch;
4184 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4185 if (team->t.t_serialized) {
4186 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4190 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4191 if (num_done == th->th.th_team_nproc) {
4193 int idx = pr_buf->th_doacross_buf_idx - 1;
4194 dispatch_shared_info_t *sh_buf =
4195 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4196 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4197 (kmp_int64)&sh_buf->doacross_num_done);
4198 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4199 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4200 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4201 sh_buf->doacross_flags = NULL;
4202 sh_buf->doacross_num_done = 0;
4203 sh_buf->doacross_buf_idx +=
4204 __kmp_dispatch_num_buffers;
4207 pr_buf->th_doacross_flags = NULL;
4208 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4209 pr_buf->th_doacross_info = NULL;
4210 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4214 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4215 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4218 void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4219 return __kmpc_calloc(__kmp_entry_gtid(), nmemb, size, allocator);
4222 void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4223 omp_allocator_handle_t free_allocator) {
4224 return __kmpc_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4228 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4229 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4232 int __kmpc_get_target_offload(
void) {
4233 if (!__kmp_init_serial) {
4234 __kmp_serial_initialize();
4236 return __kmp_target_offload;
4239 int __kmpc_pause_resource(kmp_pause_status_t level) {
4240 if (!__kmp_init_serial) {
4243 return __kmp_pause_resource(level);
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)