LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
166  virtual bool done_check() {
167  if (Sleepable && !(this->sleepLoc))
168  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169  checker;
170  else
171  return traits_type::tcr(*(this->get())) == checker;
172  }
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
181  virtual bool notdone_check() {
182  return traits_type::tcr(*(this->get())) != checker;
183  }
187  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
192  PtrType set_sleeping() {
193  if (this->sleepLoc) {
194  this->sleepLoc->store(true);
195  return *(this->get());
196  }
197  return traits_type::test_then_or((volatile PtrType *)this->get(),
198  KMP_BARRIER_SLEEP_STATE);
199  }
203  void unset_sleeping() {
204  if (this->sleepLoc) {
205  this->sleepLoc->store(false);
206  return;
207  }
208  traits_type::test_then_and((volatile PtrType *)this->get(),
209  ~KMP_BARRIER_SLEEP_STATE);
210  }
213  bool is_sleeping_val(PtrType old_loc) {
214  if (this->sleepLoc)
215  return this->sleepLoc->load();
216  return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
219  bool is_sleeping() {
220  if (this->sleepLoc)
221  return this->sleepLoc->load();
222  return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225  if (this->sleepLoc)
226  return this->sleepLoc->load();
227  return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230 };
231 
233 template <typename PtrType, flag_type FlagType, bool Sleepable>
234 class kmp_flag_atomic : public kmp_flag<FlagType> {
235 protected:
236  std::atomic<PtrType> *loc;
237  PtrType checker;
238 public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243  : kmp_flag<FlagType>(1), loc(p) {
244  this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247  : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
251  std::atomic<PtrType> *get() { return loc; }
253  void *get_void_p() { return RCAST(void *, loc); }
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
257  PtrType load() { return loc->load(std::memory_order_acquire); }
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
261  bool done_check() {
262  if (Sleepable && !(this->sleepLoc))
263  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264  else
265  return this->load() == checker;
266  }
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
275  bool notdone_check() { return this->load() != checker; }
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
282  PtrType set_sleeping() {
283  if (this->sleepLoc) {
284  this->sleepLoc->store(true);
285  return *(this->get());
286  }
287  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
292  void unset_sleeping() {
293  if (this->sleepLoc) {
294  this->sleepLoc->store(false);
295  return;
296  }
297  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
301  bool is_sleeping_val(PtrType old_loc) {
302  if (this->sleepLoc)
303  return this->sleepLoc->load();
304  return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
307  bool is_sleeping() {
308  if (this->sleepLoc)
309  return this->sleepLoc->load();
310  return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313  if (this->sleepLoc)
314  return this->sleepLoc->load();
315  return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318 };
319 
320 #if OMPT_SUPPORT
321 OMPT_NOINLINE
322 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323  ompt_state_t ompt_state,
324  ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit) {
327  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328 #if OMPT_OPTIONAL
329  void *codeptr = NULL;
330  if (ompt_enabled.ompt_callback_sync_region_wait) {
331  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
332  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
333  codeptr);
334  }
335  if (ompt_enabled.ompt_callback_sync_region) {
336  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
337  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
338  codeptr);
339  }
340 #endif
341  if (!KMP_MASTER_TID(ds_tid)) {
342  if (ompt_enabled.ompt_callback_implicit_task) {
343  int flags = this_thr->th.ompt_thread_info.parallel_flags;
344  flags = (flags & ompt_parallel_league) ? ompt_task_initial
345  : ompt_task_implicit;
346  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
347  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
348  }
349  // return to idle state
350  this_thr->th.ompt_thread_info.state = ompt_state_idle;
351  } else {
352  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353  }
354  }
355 }
356 #endif
357 
358 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359  __kmp_wait_* must make certain that another thread calls __kmp_release
360  to wake it back up to prevent deadlocks!
361 
362  NOTE: We may not belong to a team at this point. */
363 template <class C, bool final_spin, bool Cancellable = false,
364  bool Sleepable = true>
365 static inline bool
366 __kmp_wait_template(kmp_info_t *this_thr,
367  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
368 #if USE_ITT_BUILD && USE_ITT_NOTIFY
369  volatile void *spin = flag->get();
370 #endif
371  kmp_uint32 spins;
372  int th_gtid;
373  int tasks_completed = FALSE;
374 #if !KMP_USE_MONITOR
375  kmp_uint64 poll_count;
376  kmp_uint64 hibernate_goal;
377 #else
378  kmp_uint32 hibernate;
379 #endif
380  kmp_uint64 time;
381 
382  KMP_FSYNC_SPIN_INIT(spin, NULL);
383  if (flag->done_check()) {
384  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
385  return false;
386  }
387  th_gtid = this_thr->th.th_info.ds.ds_gtid;
388  if (Cancellable) {
389  kmp_team_t *team = this_thr->th.th_team;
390  if (team && team->t.t_cancel_request == cancel_parallel)
391  return true;
392  }
393 #if KMP_OS_UNIX
394  if (final_spin)
395  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
396 #endif
397  KA_TRACE(20,
398  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
399 #if KMP_STATS_ENABLED
400  stats_state_e thread_state = KMP_GET_THREAD_STATE();
401 #endif
402 
403 /* OMPT Behavior:
404 THIS function is called from
405  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
406  these have join / fork behavior
407 
408  In these cases, we don't change the state or trigger events in THIS
409 function.
410  Events are triggered in the calling code (__kmp_barrier):
411 
412  state := ompt_state_overhead
413  barrier-begin
414  barrier-wait-begin
415  state := ompt_state_wait_barrier
416  call join-barrier-implementation (finally arrive here)
417  {}
418  call fork-barrier-implementation (finally arrive here)
419  {}
420  state := ompt_state_overhead
421  barrier-wait-end
422  barrier-end
423  state := ompt_state_work_parallel
424 
425 
426  __kmp_fork_barrier (after thread creation, before executing implicit task)
427  call fork-barrier-implementation (finally arrive here)
428  {} // worker arrive here with state = ompt_state_idle
429 
430 
431  __kmp_join_barrier (implicit barrier at end of parallel region)
432  state := ompt_state_barrier_implicit
433  barrier-begin
434  barrier-wait-begin
435  call join-barrier-implementation (finally arrive here
436 final_spin=FALSE)
437  {
438  }
439  __kmp_fork_barrier (implicit barrier at end of parallel region)
440  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
441 
442  Worker after task-team is finished:
443  barrier-wait-end
444  barrier-end
445  implicit-task-end
446  idle-begin
447  state := ompt_state_idle
448 
449  Before leaving, if state = ompt_state_idle
450  idle-end
451  state := ompt_state_overhead
452 */
453 #if OMPT_SUPPORT
454  ompt_state_t ompt_entry_state;
455  ompt_data_t *tId;
456  if (ompt_enabled.enabled) {
457  ompt_entry_state = this_thr->th.ompt_thread_info.state;
458  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
459  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
460  ompt_lw_taskteam_t *team = NULL;
461  if (this_thr->th.th_team)
462  team = this_thr->th.th_team->t.ompt_serialized_team_info;
463  if (team) {
464  tId = &(team->ompt_task_info.task_data);
465  } else {
466  tId = OMPT_CUR_TASK_DATA(this_thr);
467  }
468  } else {
469  tId = &(this_thr->th.ompt_thread_info.task_data);
470  }
471  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
472  this_thr->th.th_task_team == NULL)) {
473  // implicit task is done. Either no taskqueue, or task-team finished
474  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
475  }
476  }
477 #endif
478 
479  KMP_INIT_YIELD(spins); // Setup for waiting
480  KMP_INIT_BACKOFF(time);
481 
482  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
483  __kmp_pause_status == kmp_soft_paused) {
484 #if KMP_USE_MONITOR
485 // The worker threads cannot rely on the team struct existing at this point.
486 // Use the bt values cached in the thread struct instead.
487 #ifdef KMP_ADJUST_BLOCKTIME
488  if (__kmp_pause_status == kmp_soft_paused ||
489  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
490  // Force immediate suspend if not set by user and more threads than
491  // available procs
492  hibernate = 0;
493  else
494  hibernate = this_thr->th.th_team_bt_intervals;
495 #else
496  hibernate = this_thr->th.th_team_bt_intervals;
497 #endif /* KMP_ADJUST_BLOCKTIME */
498 
499  /* If the blocktime is nonzero, we want to make sure that we spin wait for
500  the entirety of the specified #intervals, plus up to one interval more.
501  This increment make certain that this thread doesn't go to sleep too
502  soon. */
503  if (hibernate != 0)
504  hibernate++;
505 
506  // Add in the current time value.
507  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
508  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
509  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
510  hibernate - __kmp_global.g.g_time.dt.t_value));
511 #else
512  if (__kmp_pause_status == kmp_soft_paused) {
513  // Force immediate suspend
514  hibernate_goal = KMP_NOW();
515  } else
516  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
517  poll_count = 0;
518  (void)poll_count;
519 #endif // KMP_USE_MONITOR
520  }
521 
522  KMP_MB();
523 
524  // Main wait spin loop
525  while (flag->notdone_check()) {
526  kmp_task_team_t *task_team = NULL;
527  if (__kmp_tasking_mode != tskm_immediate_exec) {
528  task_team = this_thr->th.th_task_team;
529  /* If the thread's task team pointer is NULL, it means one of 3 things:
530  1) A newly-created thread is first being released by
531  __kmp_fork_barrier(), and its task team has not been set up yet.
532  2) All tasks have been executed to completion.
533  3) Tasking is off for this region. This could be because we are in a
534  serialized region (perhaps the outer one), or else tasking was manually
535  disabled (KMP_TASKING=0). */
536  if (task_team != NULL) {
537  if (TCR_SYNC_4(task_team->tt.tt_active)) {
538  if (KMP_TASKING_ENABLED(task_team)) {
539  flag->execute_tasks(
540  this_thr, th_gtid, final_spin,
541  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
542  } else
543  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
544  } else {
545  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
546 #if OMPT_SUPPORT
547  // task-team is done now, other cases should be catched above
548  if (final_spin && ompt_enabled.enabled)
549  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
550 #endif
551  this_thr->th.th_task_team = NULL;
552  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
553  }
554  } else {
555  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
556  } // if
557  } // if
558 
559  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
560  if (TCR_4(__kmp_global.g.g_done)) {
561  if (__kmp_global.g.g_abort)
562  __kmp_abort_thread();
563  break;
564  }
565 
566  // If we are oversubscribed, or have waited a bit (and
567  // KMP_LIBRARY=throughput), then yield
568  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
569 
570 #if KMP_STATS_ENABLED
571  // Check if thread has been signalled to idle state
572  // This indicates that the logical "join-barrier" has finished
573  if (this_thr->th.th_stats->isIdle() &&
574  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
575  KMP_SET_THREAD_STATE(IDLE);
576  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
577  }
578 #endif
579  // Check if the barrier surrounding this wait loop has been cancelled
580  if (Cancellable) {
581  kmp_team_t *team = this_thr->th.th_team;
582  if (team && team->t.t_cancel_request == cancel_parallel)
583  break;
584  }
585 
586  // For hidden helper thread, if task_team is nullptr, it means the main
587  // thread has not released the barrier. We cannot wait here because once the
588  // main thread releases all children barriers, all hidden helper threads are
589  // still sleeping. This leads to a problem that following configuration,
590  // such as task team sync, will not be performed such that this thread does
591  // not have task team. Usually it is not bad. However, a corner case is,
592  // when the first task encountered is an untied task, the check in
593  // __kmp_task_alloc will crash because it uses the task team pointer without
594  // checking whether it is nullptr. It is probably under some kind of
595  // assumption.
596  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
597  !TCR_4(__kmp_hidden_helper_team_done)) {
598  // If there is still hidden helper tasks to be executed, the hidden helper
599  // thread will not enter a waiting status.
600  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
601  __kmp_hidden_helper_worker_thread_wait();
602  }
603  continue;
604  }
605 
606  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
607  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
608  __kmp_pause_status != kmp_soft_paused)
609  continue;
610 
611  // Don't suspend if there is a likelihood of new tasks being spawned.
612  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
613  continue;
614 
615 #if KMP_USE_MONITOR
616  // If we have waited a bit more, fall asleep
617  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
618  continue;
619 #else
620  if (KMP_BLOCKING(hibernate_goal, poll_count++))
621  continue;
622 #endif
623  // Don't suspend if wait loop designated non-sleepable
624  // in template parameters
625  if (!Sleepable)
626  continue;
627 
628  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
629  __kmp_pause_status != kmp_soft_paused)
630  continue;
631 
632 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
633  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
634  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
635  flag->mwait(th_gtid);
636  } else {
637 #endif
638  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
639 #if KMP_OS_UNIX
640  if (final_spin)
641  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
642 #endif
643  flag->suspend(th_gtid);
644 #if KMP_OS_UNIX
645  if (final_spin)
646  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
647 #endif
648 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
649  }
650 #endif
651 
652  if (TCR_4(__kmp_global.g.g_done)) {
653  if (__kmp_global.g.g_abort)
654  __kmp_abort_thread();
655  break;
656  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
657  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
658  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
659  }
660  // TODO: If thread is done with work and times out, disband/free
661  }
662 
663 #if OMPT_SUPPORT
664  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
665  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
666 #if OMPT_OPTIONAL
667  if (final_spin) {
668  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
669  ompt_exit_state = this_thr->th.ompt_thread_info.state;
670  }
671 #endif
672  if (ompt_exit_state == ompt_state_idle) {
673  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
674  }
675  }
676 #endif
677 #if KMP_STATS_ENABLED
678  // If we were put into idle state, pop that off the state stack
679  if (KMP_GET_THREAD_STATE() == IDLE) {
680  KMP_POP_PARTITIONED_TIMER();
681  KMP_SET_THREAD_STATE(thread_state);
682  this_thr->th.th_stats->resetIdleFlag();
683  }
684 #endif
685 
686 #if KMP_OS_UNIX
687  if (final_spin)
688  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
689 #endif
690  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
691  if (Cancellable) {
692  kmp_team_t *team = this_thr->th.th_team;
693  if (team && team->t.t_cancel_request == cancel_parallel) {
694  if (tasks_completed) {
695  // undo the previous decrement of unfinished_threads so that the
696  // thread can decrement at the join barrier with no problem
697  kmp_task_team_t *task_team = this_thr->th.th_task_team;
698  std::atomic<kmp_int32> *unfinished_threads =
699  &(task_team->tt.tt_unfinished_threads);
700  KMP_ATOMIC_INC(unfinished_threads);
701  }
702  return true;
703  }
704  }
705  return false;
706 }
707 
708 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
709 // Set up a monitor on the flag variable causing the calling thread to wait in
710 // a less active state until the flag variable is modified.
711 template <class C>
712 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
713  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
714  kmp_info_t *th = __kmp_threads[th_gtid];
715 
716  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
717  flag->get()));
718 
719  // User-level mwait is available
720  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
721 
722  __kmp_suspend_initialize_thread(th);
723  __kmp_lock_suspend_mx(th);
724 
725  volatile void *spin = flag->get();
726  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
727 
728  if (!flag->done_check()) {
729  // Mark thread as no longer active
730  th->th.th_active = FALSE;
731  if (th->th.th_active_in_pool) {
732  th->th.th_active_in_pool = FALSE;
733  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
734  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
735  }
736  flag->set_sleeping();
737  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
738 #if KMP_HAVE_UMWAIT
739  if (__kmp_umwait_enabled) {
740  __kmp_umonitor(cacheline);
741  }
742 #elif KMP_HAVE_MWAIT
743  if (__kmp_mwait_enabled) {
744  __kmp_mm_monitor(cacheline, 0, 0);
745  }
746 #endif
747  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
748  // the address could happen after the last time we checked and before
749  // monitoring started, in which case monitor can't detect the change.
750  if (flag->done_check())
751  flag->unset_sleeping();
752  else {
753  // if flag changes here, wake-up happens immediately
754  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
755  th->th.th_sleep_loc_type = flag->get_type();
756  __kmp_unlock_suspend_mx(th);
757  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
758 #if KMP_HAVE_UMWAIT
759  if (__kmp_umwait_enabled) {
760  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
761  }
762 #elif KMP_HAVE_MWAIT
763  if (__kmp_mwait_enabled) {
764  __kmp_mm_mwait(0, __kmp_mwait_hints);
765  }
766 #endif
767  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
768  __kmp_lock_suspend_mx(th);
769  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
770  if (flag->is_sleeping())
771  flag->unset_sleeping();
772  TCW_PTR(th->th.th_sleep_loc, NULL);
773  th->th.th_sleep_loc_type = flag_unset;
774  }
775  // Mark thread as active again
776  th->th.th_active = TRUE;
777  if (TCR_4(th->th.th_in_pool)) {
778  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
779  th->th.th_active_in_pool = TRUE;
780  }
781  } // Drop out to main wait loop to check flag, handle tasks, etc.
782  __kmp_unlock_suspend_mx(th);
783  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
784 }
785 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
786 
787 /* Release any threads specified as waiting on the flag by releasing the flag
788  and resume the waiting thread if indicated by the sleep bit(s). A thread that
789  calls __kmp_wait_template must call this function to wake up the potentially
790  sleeping thread and prevent deadlocks! */
791 template <class C> static inline void __kmp_release_template(C *flag) {
792 #ifdef KMP_DEBUG
793  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
794 #endif
795  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
796  KMP_DEBUG_ASSERT(flag->get());
797  KMP_FSYNC_RELEASING(flag->get_void_p());
798 
799  flag->internal_release();
800 
801  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
802  flag->load()));
803 
804  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
805  // Only need to check sleep stuff if infinite block time not set.
806  // Are *any* threads waiting on flag sleeping?
807  if (flag->is_any_sleeping()) {
808  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
809  // if sleeping waiter exists at i, sets current_waiter to i inside flag
810  kmp_info_t *waiter = flag->get_waiter(i);
811  if (waiter) {
812  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
813  // Wake up thread if needed
814  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
815  "flag(%p) set\n",
816  gtid, wait_gtid, flag->get()));
817  flag->resume(wait_gtid); // unsets flag's current_waiter when done
818  }
819  }
820  }
821  }
822 }
823 
824 template <bool Cancellable, bool Sleepable>
825 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
826 public:
827  kmp_flag_32(std::atomic<kmp_uint32> *p)
828  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
829  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
830  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
831  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
832  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
833  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
834 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
835  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
836 #endif
837  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
838  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
839  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
840  kmp_int32 is_constrained) {
841  return __kmp_execute_tasks_32(
842  this_thr, gtid, this, final_spin,
843  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
844  }
845  bool wait(kmp_info_t *this_thr,
846  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
847  if (final_spin)
848  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
849  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
850  else
851  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
852  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
853  }
854  void release() { __kmp_release_template(this); }
855  flag_type get_ptr_type() { return flag32; }
856 };
857 
858 template <bool Cancellable, bool Sleepable>
859 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
860 public:
861  kmp_flag_64(volatile kmp_uint64 *p)
862  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
863  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
864  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
865  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
866  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
867  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
868  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
869  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
870 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
871  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
872 #endif
873  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
874  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
875  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
876  kmp_int32 is_constrained) {
877  return __kmp_execute_tasks_64(
878  this_thr, gtid, this, final_spin,
879  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
880  }
881  bool wait(kmp_info_t *this_thr,
882  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
883  if (final_spin)
884  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
885  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
886  else
887  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
888  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
889  }
890  void release() { __kmp_release_template(this); }
891  flag_type get_ptr_type() { return flag64; }
892 };
893 
894 template <bool Cancellable, bool Sleepable>
895 class kmp_atomic_flag_64
896  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
897 public:
898  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
899  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
900  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
901  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
902  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
903  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
904  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
905  std::atomic<bool> *loc)
906  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
907  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
908  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
909  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
910  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
911  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
912  kmp_int32 is_constrained) {
913  return __kmp_atomic_execute_tasks_64(
914  this_thr, gtid, this, final_spin,
915  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
916  }
917  bool wait(kmp_info_t *this_thr,
918  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
919  if (final_spin)
920  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
921  Sleepable>(
922  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
923  else
924  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
925  Sleepable>(
926  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
927  }
928  void release() { __kmp_release_template(this); }
929  flag_type get_ptr_type() { return atomic_flag64; }
930 };
931 
932 // Hierarchical 64-bit on-core barrier instantiation
933 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
934  kmp_uint32 offset;
935  bool flag_switch;
936  enum barrier_type bt;
937  kmp_info_t *this_thr;
938 #if USE_ITT_BUILD
939  void *itt_sync_obj;
940 #endif
941  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
942  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
943  }
944 
945 public:
946  kmp_flag_oncore(volatile kmp_uint64 *p)
947  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
948  }
949  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
950  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
951  flag_switch(false),
952  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
953  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
954  enum barrier_type bar_t,
955  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
956  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
957  flag_switch(false), bt(bar_t),
958  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
959  virtual ~kmp_flag_oncore() override {}
960  void *operator new(size_t size) { return __kmp_allocate(size); }
961  void operator delete(void *p) { __kmp_free(p); }
962  bool done_check_val(kmp_uint64 old_loc) override {
963  return byteref(&old_loc, offset) == checker;
964  }
965  bool done_check() override { return done_check_val(*get()); }
966  bool notdone_check() override {
967  // Calculate flag_switch
968  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
969  flag_switch = true;
970  if (byteref(get(), offset) != 1 && !flag_switch)
971  return true;
972  else if (flag_switch) {
973  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
974  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
975  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
976  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
977  }
978  return false;
979  }
980  void internal_release() {
981  // Other threads can write their own bytes simultaneously.
982  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
983  byteref(get(), offset) = 1;
984  } else {
985  kmp_uint64 mask = 0;
986  byteref(&mask, offset) = 1;
987  KMP_TEST_THEN_OR64(get(), mask);
988  }
989  }
990  void wait(kmp_info_t *this_thr, int final_spin) {
991  if (final_spin)
992  __kmp_wait_template<kmp_flag_oncore, TRUE>(
993  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
994  else
995  __kmp_wait_template<kmp_flag_oncore, FALSE>(
996  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
997  }
998  void release() { __kmp_release_template(this); }
999  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1000 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1001  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1002 #endif
1003  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1004  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1005  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1006  kmp_int32 is_constrained) {
1007 #if OMPD_SUPPORT
1008  int ret = __kmp_execute_tasks_oncore(
1009  this_thr, gtid, this, final_spin,
1010  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1011  if (ompd_state & OMPD_ENABLE_BP)
1012  ompd_bp_task_end();
1013  return ret;
1014 #else
1015  return __kmp_execute_tasks_oncore(
1016  this_thr, gtid, this, final_spin,
1017  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1018 #endif
1019  }
1020  enum barrier_type get_bt() { return bt; }
1021  flag_type get_ptr_type() { return flag_oncore; }
1022 };
1023 
1024 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1025  int gtid = __kmp_gtid_from_thread(thr);
1026  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1027  flag_type type = thr->th.th_sleep_loc_type;
1028  if (!flag)
1029  return;
1030  // Attempt to wake up a thread: examine its type and call appropriate template
1031  switch (type) {
1032  case flag32:
1033  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1034  break;
1035  case flag64:
1036  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1037  break;
1038  case atomic_flag64:
1039  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1040  break;
1041  case flag_oncore:
1042  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1043  break;
1044 #ifdef KMP_DEBUG
1045  case flag_unset:
1046  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1047  break;
1048  default:
1049  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
1050  "known flag type\n",
1051  type));
1052 #endif
1053  }
1054 }
1055 
1060 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
kmp_info_t * waiting_threads[1]
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63