13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
18 #include "kmp_stats.h"
20 #include "ompt-specific.h"
45 struct flag_properties {
46 unsigned int type : 16;
47 unsigned int reserved : 16;
60 : loc(p), t({(
short unsigned int)ft, 0U}) {}
61 volatile P *get() {
return loc; }
62 void *get_void_p() {
return RCAST(
void *, CCAST(P *, loc)); }
63 void set(
volatile P *new_loc) { loc = new_loc; }
65 P load() {
return *loc; }
66 void store(P val) { *loc = val; }
80 :
loc(p),
t({(
short unsigned int)ft, 0U}) {}
84 std::atomic<P> *
get() {
return loc; }
92 void set(std::atomic<P> *new_loc) {
loc = new_loc; }
100 P
load() {
return loc->load(std::memory_order_acquire); }
104 void store(P val) {
loc->store(val, std::memory_order_release); }
130 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
131 ompt_state_t ompt_state,
133 int ds_tid = this_thr->th.th_info.ds.ds_tid;
134 if (ompt_state == ompt_state_wait_barrier_implicit) {
135 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
137 void *codeptr = NULL;
138 if (ompt_enabled.ompt_callback_sync_region_wait) {
139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
140 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
143 if (ompt_enabled.ompt_callback_sync_region) {
144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
145 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
149 if (!KMP_MASTER_TID(ds_tid)) {
150 if (ompt_enabled.ompt_callback_implicit_task) {
151 int flags = this_thr->th.ompt_thread_info.parallel_flags;
152 flags = (flags & ompt_parallel_league) ? ompt_task_initial
153 : ompt_task_implicit;
154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
155 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
158 this_thr->th.ompt_thread_info.state = ompt_state_idle;
160 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
171 template <
class C,
bool final_spin,
bool Cancellable =
false,
172 bool Sleepable =
true>
174 __kmp_wait_template(kmp_info_t *this_thr,
175 C *flag USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
176 #if USE_ITT_BUILD && USE_ITT_NOTIFY
177 volatile void *spin = flag->get();
181 int tasks_completed = FALSE;
184 kmp_uint64 poll_count;
185 kmp_uint64 hibernate_goal;
187 kmp_uint32 hibernate;
190 KMP_FSYNC_SPIN_INIT(spin, NULL);
191 if (flag->done_check()) {
192 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
195 th_gtid = this_thr->th.th_info.ds.ds_gtid;
197 kmp_team_t *team = this_thr->th.th_team;
198 if (team && team->t.t_cancel_request == cancel_parallel)
203 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
206 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
207 #if KMP_STATS_ENABLED
262 ompt_state_t ompt_entry_state;
264 if (ompt_enabled.enabled) {
265 ompt_entry_state = this_thr->th.ompt_thread_info.state;
266 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
267 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
268 ompt_lw_taskteam_t *team =
269 this_thr->th.th_team->t.ompt_serialized_team_info;
271 tId = &(team->ompt_task_info.task_data);
273 tId = OMPT_CUR_TASK_DATA(this_thr);
276 tId = &(this_thr->th.ompt_thread_info.task_data);
278 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
279 this_thr->th.th_task_team == NULL)) {
281 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
286 KMP_INIT_YIELD(spins);
288 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
289 __kmp_pause_status == kmp_soft_paused) {
293 #ifdef KMP_ADJUST_BLOCKTIME
294 if (__kmp_pause_status == kmp_soft_paused ||
295 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
300 hibernate = this_thr->th.th_team_bt_intervals;
302 hibernate = this_thr->th.th_team_bt_intervals;
313 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
314 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
315 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
316 hibernate - __kmp_global.g.g_time.dt.t_value));
318 if (__kmp_pause_status == kmp_soft_paused) {
320 hibernate_goal = KMP_NOW();
322 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
327 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
331 while (flag->notdone_check()) {
332 kmp_task_team_t *task_team = NULL;
333 if (__kmp_tasking_mode != tskm_immediate_exec) {
334 task_team = this_thr->th.th_task_team;
342 if (task_team != NULL) {
343 if (TCR_SYNC_4(task_team->tt.tt_active)) {
344 if (KMP_TASKING_ENABLED(task_team))
346 this_thr, th_gtid, final_spin,
347 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
349 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
354 if (final_spin && ompt_enabled.enabled)
355 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
357 this_thr->th.th_task_team = NULL;
358 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
361 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
365 KMP_FSYNC_SPIN_PREPARE(CCAST(
void *, spin));
366 if (TCR_4(__kmp_global.g.g_done)) {
367 if (__kmp_global.g.g_abort)
368 __kmp_abort_thread();
374 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
376 #if KMP_STATS_ENABLED
379 if (this_thr->th.th_stats->isIdle() &&
380 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
381 KMP_SET_THREAD_STATE(IDLE);
382 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
387 kmp_team_t *team = this_thr->th.th_team;
388 if (team && team->t.t_cancel_request == cancel_parallel)
402 if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
403 !TCR_4(__kmp_hidden_helper_team_done)) {
406 if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
407 __kmp_hidden_helper_worker_thread_wait();
413 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
414 __kmp_pause_status != kmp_soft_paused)
418 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
423 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
426 if (KMP_BLOCKING(hibernate_goal, poll_count++))
434 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
435 __kmp_pause_status != kmp_soft_paused)
438 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
439 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
440 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
441 flag->mwait(th_gtid);
444 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
447 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
449 flag->suspend(th_gtid);
452 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
454 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
458 if (TCR_4(__kmp_global.g.g_done)) {
459 if (__kmp_global.g.g_abort)
460 __kmp_abort_thread();
462 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
463 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
464 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
470 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
471 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
474 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
475 ompt_exit_state = this_thr->th.ompt_thread_info.state;
478 if (ompt_exit_state == ompt_state_idle) {
479 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
483 #if KMP_STATS_ENABLED
485 if (KMP_GET_THREAD_STATE() == IDLE) {
486 KMP_POP_PARTITIONED_TIMER();
487 KMP_SET_THREAD_STATE(thread_state);
488 this_thr->th.th_stats->resetIdleFlag();
494 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
496 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
498 kmp_team_t *team = this_thr->th.th_team;
499 if (team && team->t.t_cancel_request == cancel_parallel) {
500 if (tasks_completed) {
503 kmp_task_team_t *task_team = this_thr->th.th_task_team;
504 std::atomic<kmp_int32> *unfinished_threads =
505 &(task_team->tt.tt_unfinished_threads);
506 KMP_ATOMIC_INC(unfinished_threads);
514 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
518 static inline void __kmp_mwait_template(
int th_gtid, C *flag) {
519 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
520 kmp_info_t *th = __kmp_threads[th_gtid];
522 KF_TRACE(30, (
"__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
526 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
528 __kmp_suspend_initialize_thread(th);
529 __kmp_lock_suspend_mx(th);
531 volatile void *spin = flag->get();
532 void *cacheline = (
void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
534 if (!flag->done_check()) {
536 th->th.th_active = FALSE;
537 if (th->th.th_active_in_pool) {
538 th->th.th_active_in_pool = FALSE;
539 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
540 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
542 flag->set_sleeping();
543 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
545 if (__kmp_umwait_enabled) {
546 __kmp_umonitor(cacheline);
549 if (__kmp_mwait_enabled) {
550 __kmp_mm_monitor(cacheline, 0, 0);
556 if (flag->done_check())
557 flag->unset_sleeping();
560 TCW_PTR(th->th.th_sleep_loc, (
void *)flag);
561 __kmp_unlock_suspend_mx(th);
562 KF_TRACE(50, (
"__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
564 if (__kmp_umwait_enabled) {
565 __kmp_umwait(1, 100);
568 if (__kmp_mwait_enabled) {
569 __kmp_mm_mwait(0, __kmp_mwait_hints);
572 KF_TRACE(50, (
"__kmp_mwait_template: T#%d mwait done\n", th_gtid));
573 __kmp_lock_suspend_mx(th);
575 if (flag->is_sleeping())
576 flag->unset_sleeping();
577 TCW_PTR(th->th.th_sleep_loc, NULL);
580 th->th.th_active = TRUE;
581 if (TCR_4(th->th.th_in_pool)) {
582 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
583 th->th.th_active_in_pool = TRUE;
586 __kmp_unlock_suspend_mx(th);
587 KF_TRACE(30, (
"__kmp_mwait_template: T#%d exit\n", th_gtid));
595 template <
class C>
static inline void __kmp_release_template(C *flag) {
597 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
599 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
600 KMP_DEBUG_ASSERT(flag->get());
601 KMP_FSYNC_RELEASING(flag->get_void_p());
603 flag->internal_release();
605 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
608 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
611 if (flag->is_any_sleeping()) {
612 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
614 kmp_info_t *waiter = flag->get_waiter(i);
616 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
618 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep "
620 gtid, wait_gtid, flag->get()));
621 flag->resume(wait_gtid);
628 template <
typename FlagType>
struct flag_traits {};
630 template <>
struct flag_traits<kmp_uint32> {
631 typedef kmp_uint32 flag_t;
633 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
634 static inline flag_t test_then_add4(
volatile flag_t *f) {
635 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
637 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
638 return KMP_TEST_THEN_OR32(f, v);
640 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
641 return KMP_TEST_THEN_AND32(f, v);
645 template <>
struct flag_traits<kmp_uint64> {
646 typedef kmp_uint64 flag_t;
648 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
649 static inline flag_t test_then_add4(
volatile flag_t *f) {
650 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
652 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
653 return KMP_TEST_THEN_OR64(f, v);
655 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
656 return KMP_TEST_THEN_AND64(f, v);
661 template <
typename FlagType,
bool Sleepable>
663 typedef flag_traits<FlagType> traits_type;
671 kmp_basic_flag_native(
volatile FlagType *p)
672 :
kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
673 kmp_basic_flag_native(
volatile FlagType *p, kmp_info_t *thr)
674 :
kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
675 waiting_threads[0] = thr;
677 kmp_basic_flag_native(
volatile FlagType *p, FlagType c)
679 num_waiting_threads(0) {}
684 kmp_info_t *get_waiter(kmp_uint32 i) {
685 KMP_DEBUG_ASSERT(i < num_waiting_threads);
686 return waiting_threads[i];
691 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
697 void set_waiter(kmp_info_t *thr) {
698 waiting_threads[0] = thr;
699 num_waiting_threads = 1;
706 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
709 return traits_type::tcr(*(this->get())) == checker;
715 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
723 bool notdone_check() {
return traits_type::tcr(*(this->get())) != checker; }
728 void internal_release() {
729 (void)traits_type::test_then_add4((
volatile FlagType *)this->get());
736 FlagType set_sleeping() {
737 return traits_type::test_then_or((
volatile FlagType *)this->get(),
738 KMP_BARRIER_SLEEP_STATE);
745 FlagType unset_sleeping() {
746 return traits_type::test_then_and((
volatile FlagType *)this->get(),
747 ~KMP_BARRIER_SLEEP_STATE);
753 bool is_sleeping_val(FlagType old_loc) {
754 return old_loc & KMP_BARRIER_SLEEP_STATE;
759 bool is_sleeping() {
return is_sleeping_val(*(this->get())); }
760 bool is_any_sleeping() {
return is_sleeping_val(*(this->get())); }
761 kmp_uint8 *get_stolen() {
return NULL; }
762 enum barrier_type get_bt() {
return bs_last_barrier; }
765 template <
typename FlagType,
bool Sleepable>
766 class kmp_basic_flag :
public kmp_flag<FlagType> {
767 typedef flag_traits<FlagType> traits_type;
775 kmp_basic_flag(std::atomic<FlagType> *p)
776 :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
777 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
778 :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
779 waiting_threads[0] = thr;
781 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
782 :
kmp_flag<FlagType>(p, traits_type::t), checker(c),
783 num_waiting_threads(0) {}
788 kmp_info_t *get_waiter(kmp_uint32 i) {
789 KMP_DEBUG_ASSERT(i < num_waiting_threads);
790 return waiting_threads[i];
795 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
801 void set_waiter(kmp_info_t *thr) {
802 waiting_threads[0] = thr;
803 num_waiting_threads = 1;
810 return (this->
load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
812 return this->
load() == checker;
818 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
826 bool notdone_check() {
return this->
load() != checker; }
831 void internal_release() { KMP_ATOMIC_ADD(this->
get(), 4); }
837 FlagType set_sleeping() {
838 return KMP_ATOMIC_OR(this->
get(), KMP_BARRIER_SLEEP_STATE);
845 FlagType unset_sleeping() {
846 return KMP_ATOMIC_AND(this->
get(), ~KMP_BARRIER_SLEEP_STATE);
852 bool is_sleeping_val(FlagType old_loc) {
853 return old_loc & KMP_BARRIER_SLEEP_STATE;
858 bool is_sleeping() {
return is_sleeping_val(this->
load()); }
859 bool is_any_sleeping() {
return is_sleeping_val(this->
load()); }
860 kmp_uint8 *get_stolen() {
return NULL; }
861 enum barrier_type get_bt() {
return bs_last_barrier; }
864 template <
bool Cancellable,
bool Sleepable>
865 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32, Sleepable> {
867 kmp_flag_32(std::atomic<kmp_uint32> *p)
868 : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
869 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
870 : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
871 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
872 : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
873 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
874 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
875 void mwait(
int th_gtid) { __kmp_mwait_32(th_gtid,
this); }
877 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
878 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
879 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
880 kmp_int32 is_constrained) {
881 return __kmp_execute_tasks_32(
882 this_thr, gtid,
this, final_spin,
883 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
885 bool wait(kmp_info_t *this_thr,
886 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
888 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
889 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
891 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
892 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
894 void release() { __kmp_release_template(
this); }
898 template <
bool Cancellable,
bool Sleepable>
899 class kmp_flag_64 :
public kmp_basic_flag_native<kmp_uint64, Sleepable> {
901 kmp_flag_64(
volatile kmp_uint64 *p)
902 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
903 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
904 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
905 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
906 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
907 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
908 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
909 void mwait(
int th_gtid) { __kmp_mwait_64(th_gtid,
this); }
911 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
912 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
913 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
914 kmp_int32 is_constrained) {
915 return __kmp_execute_tasks_64(
916 this_thr, gtid,
this, final_spin,
917 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
919 bool wait(kmp_info_t *this_thr,
920 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
922 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
923 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
925 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
926 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
928 void release() { __kmp_release_template(
this); }
935 kmp_info_t *waiting_threads[1];
936 kmp_uint32 num_waiting_threads;
940 enum barrier_type bt;
941 kmp_info_t *this_thr;
947 unsigned char &byteref(
volatile kmp_uint64 *loc,
size_t offset) {
948 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
952 kmp_flag_oncore(
volatile kmp_uint64 *p)
954 flag_switch(false) {}
955 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
957 offset(idx), flag_switch(false) {}
958 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
959 enum barrier_type bar_t,
960 kmp_info_t *thr USE_ITT_BUILD_ARG(
void *itt))
962 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
963 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
964 kmp_info_t *get_waiter(kmp_uint32 i) {
965 KMP_DEBUG_ASSERT(i < num_waiting_threads);
966 return waiting_threads[i];
968 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
969 void set_waiter(kmp_info_t *thr) {
970 waiting_threads[0] = thr;
971 num_waiting_threads = 1;
973 bool done_check_val(kmp_uint64 old_loc) {
974 return byteref(&old_loc, offset) == checker;
976 bool done_check() {
return done_check_val(*get()); }
977 bool notdone_check() {
979 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
981 if (byteref(get(), offset) != 1 && !flag_switch)
983 else if (flag_switch) {
984 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
985 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
986 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
987 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
991 void internal_release() {
993 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
994 byteref(get(), offset) = 1;
997 byteref(&mask, offset) = 1;
998 KMP_TEST_THEN_OR64(get(), mask);
1001 kmp_uint64 set_sleeping() {
1002 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
1004 kmp_uint64 unset_sleeping() {
1005 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
1007 bool is_sleeping_val(kmp_uint64 old_loc) {
1008 return old_loc & KMP_BARRIER_SLEEP_STATE;
1010 bool is_sleeping() {
return is_sleeping_val(*get()); }
1011 bool is_any_sleeping() {
return is_sleeping_val(*get()); }
1012 void wait(kmp_info_t *this_thr,
int final_spin) {
1014 __kmp_wait_template<kmp_flag_oncore, TRUE>(
1015 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
1017 __kmp_wait_template<kmp_flag_oncore, FALSE>(
1018 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
1020 void release() { __kmp_release_template(
this); }
1021 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
1022 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1023 void mwait(
int th_gtid) { __kmp_mwait_oncore(th_gtid,
this); }
1025 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
1026 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
1027 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
1028 kmp_int32 is_constrained) {
1029 return __kmp_execute_tasks_oncore(
1030 this_thr, gtid,
this, final_spin,
1031 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1033 kmp_uint8 *get_stolen() {
return NULL; }
1034 enum barrier_type get_bt() {
return bt; }
1040 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
1044 switch (RCAST(kmp_flag_64<> *, CCAST(
void *, flag))->get_type()) {
1046 __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
1049 __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
1052 __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
void set(std::atomic< P > *new_loc)
stats_state_e
the states which a thread can be in