LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
45 struct flag_properties {
46  unsigned int type : 16;
47  unsigned int reserved : 16;
48 };
49 
53 template <typename P> class kmp_flag_native {
54  volatile P *loc;
55  flag_properties t;
56 
57 public:
58  typedef P flag_t;
59  kmp_flag_native(volatile P *p, flag_type ft)
60  : loc(p), t({(short unsigned int)ft, 0U}) {}
61  volatile P *get() { return loc; }
62  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
63  void set(volatile P *new_loc) { loc = new_loc; }
64  flag_type get_type() { return (flag_type)(t.type); }
65  P load() { return *loc; }
66  void store(P val) { *loc = val; }
67 };
68 
72 template <typename P> class kmp_flag {
73  std::atomic<P>
74  *loc;
76  flag_properties t;
77 public:
78  typedef P flag_t;
79  kmp_flag(std::atomic<P> *p, flag_type ft)
80  : loc(p), t({(short unsigned int)ft, 0U}) {}
84  std::atomic<P> *get() { return loc; }
88  void *get_void_p() { return RCAST(void *, loc); }
92  void set(std::atomic<P> *new_loc) { loc = new_loc; }
96  flag_type get_type() { return (flag_type)(t.type); }
100  P load() { return loc->load(std::memory_order_acquire); }
104  void store(P val) { loc->store(val, std::memory_order_release); }
105  // Derived classes must provide the following:
106  /*
107  kmp_info_t * get_waiter(kmp_uint32 i);
108  kmp_uint32 get_num_waiters();
109  bool done_check();
110  bool done_check_val(P old_loc);
111  bool notdone_check();
112  P internal_release();
113  void suspend(int th_gtid);
114  void mwait(int th_gtid);
115  void resume(int th_gtid);
116  P set_sleeping();
117  P unset_sleeping();
118  bool is_sleeping();
119  bool is_any_sleeping();
120  bool is_sleeping_val(P old_loc);
121  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
122  int *thread_finished
123  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
124  is_constrained);
125  */
126 };
127 
128 #if OMPT_SUPPORT
129 OMPT_NOINLINE
130 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
131  ompt_state_t ompt_state,
132  ompt_data_t *tId) {
133  int ds_tid = this_thr->th.th_info.ds.ds_tid;
134  if (ompt_state == ompt_state_wait_barrier_implicit) {
135  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
136 #if OMPT_OPTIONAL
137  void *codeptr = NULL;
138  if (ompt_enabled.ompt_callback_sync_region_wait) {
139  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
140  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141  codeptr);
142  }
143  if (ompt_enabled.ompt_callback_sync_region) {
144  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
145  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
146  codeptr);
147  }
148 #endif
149  if (!KMP_MASTER_TID(ds_tid)) {
150  if (ompt_enabled.ompt_callback_implicit_task) {
151  int flags = this_thr->th.ompt_thread_info.parallel_flags;
152  flags = (flags & ompt_parallel_league) ? ompt_task_initial
153  : ompt_task_implicit;
154  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
155  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
156  }
157  // return to idle state
158  this_thr->th.ompt_thread_info.state = ompt_state_idle;
159  } else {
160  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
161  }
162  }
163 }
164 #endif
165 
166 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
167  __kmp_wait_* must make certain that another thread calls __kmp_release
168  to wake it back up to prevent deadlocks!
169 
170  NOTE: We may not belong to a team at this point. */
171 template <class C, bool final_spin, bool Cancellable = false,
172  bool Sleepable = true>
173 static inline bool
174 __kmp_wait_template(kmp_info_t *this_thr,
175  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
176 #if USE_ITT_BUILD && USE_ITT_NOTIFY
177  volatile void *spin = flag->get();
178 #endif
179  kmp_uint32 spins;
180  int th_gtid;
181  int tasks_completed = FALSE;
182  int oversubscribed;
183 #if !KMP_USE_MONITOR
184  kmp_uint64 poll_count;
185  kmp_uint64 hibernate_goal;
186 #else
187  kmp_uint32 hibernate;
188 #endif
189 
190  KMP_FSYNC_SPIN_INIT(spin, NULL);
191  if (flag->done_check()) {
192  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
193  return false;
194  }
195  th_gtid = this_thr->th.th_info.ds.ds_gtid;
196  if (Cancellable) {
197  kmp_team_t *team = this_thr->th.th_team;
198  if (team && team->t.t_cancel_request == cancel_parallel)
199  return true;
200  }
201 #if KMP_OS_UNIX
202  if (final_spin)
203  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
204 #endif
205  KA_TRACE(20,
206  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
207 #if KMP_STATS_ENABLED
208  stats_state_e thread_state = KMP_GET_THREAD_STATE();
209 #endif
210 
211 /* OMPT Behavior:
212 THIS function is called from
213  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
214  these have join / fork behavior
215 
216  In these cases, we don't change the state or trigger events in THIS
217 function.
218  Events are triggered in the calling code (__kmp_barrier):
219 
220  state := ompt_state_overhead
221  barrier-begin
222  barrier-wait-begin
223  state := ompt_state_wait_barrier
224  call join-barrier-implementation (finally arrive here)
225  {}
226  call fork-barrier-implementation (finally arrive here)
227  {}
228  state := ompt_state_overhead
229  barrier-wait-end
230  barrier-end
231  state := ompt_state_work_parallel
232 
233 
234  __kmp_fork_barrier (after thread creation, before executing implicit task)
235  call fork-barrier-implementation (finally arrive here)
236  {} // worker arrive here with state = ompt_state_idle
237 
238 
239  __kmp_join_barrier (implicit barrier at end of parallel region)
240  state := ompt_state_barrier_implicit
241  barrier-begin
242  barrier-wait-begin
243  call join-barrier-implementation (finally arrive here
244 final_spin=FALSE)
245  {
246  }
247  __kmp_fork_barrier (implicit barrier at end of parallel region)
248  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
249 
250  Worker after task-team is finished:
251  barrier-wait-end
252  barrier-end
253  implicit-task-end
254  idle-begin
255  state := ompt_state_idle
256 
257  Before leaving, if state = ompt_state_idle
258  idle-end
259  state := ompt_state_overhead
260 */
261 #if OMPT_SUPPORT
262  ompt_state_t ompt_entry_state;
263  ompt_data_t *tId;
264  if (ompt_enabled.enabled) {
265  ompt_entry_state = this_thr->th.ompt_thread_info.state;
266  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
267  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
268  ompt_lw_taskteam_t *team =
269  this_thr->th.th_team->t.ompt_serialized_team_info;
270  if (team) {
271  tId = &(team->ompt_task_info.task_data);
272  } else {
273  tId = OMPT_CUR_TASK_DATA(this_thr);
274  }
275  } else {
276  tId = &(this_thr->th.ompt_thread_info.task_data);
277  }
278  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
279  this_thr->th.th_task_team == NULL)) {
280  // implicit task is done. Either no taskqueue, or task-team finished
281  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
282  }
283  }
284 #endif
285 
286  KMP_INIT_YIELD(spins); // Setup for waiting
287 
288  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
289  __kmp_pause_status == kmp_soft_paused) {
290 #if KMP_USE_MONITOR
291 // The worker threads cannot rely on the team struct existing at this point.
292 // Use the bt values cached in the thread struct instead.
293 #ifdef KMP_ADJUST_BLOCKTIME
294  if (__kmp_pause_status == kmp_soft_paused ||
295  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
296  // Force immediate suspend if not set by user and more threads than
297  // available procs
298  hibernate = 0;
299  else
300  hibernate = this_thr->th.th_team_bt_intervals;
301 #else
302  hibernate = this_thr->th.th_team_bt_intervals;
303 #endif /* KMP_ADJUST_BLOCKTIME */
304 
305  /* If the blocktime is nonzero, we want to make sure that we spin wait for
306  the entirety of the specified #intervals, plus up to one interval more.
307  This increment make certain that this thread doesn't go to sleep too
308  soon. */
309  if (hibernate != 0)
310  hibernate++;
311 
312  // Add in the current time value.
313  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
314  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
315  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
316  hibernate - __kmp_global.g.g_time.dt.t_value));
317 #else
318  if (__kmp_pause_status == kmp_soft_paused) {
319  // Force immediate suspend
320  hibernate_goal = KMP_NOW();
321  } else
322  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
323  poll_count = 0;
324 #endif // KMP_USE_MONITOR
325  }
326 
327  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
328  KMP_MB();
329 
330  // Main wait spin loop
331  while (flag->notdone_check()) {
332  kmp_task_team_t *task_team = NULL;
333  if (__kmp_tasking_mode != tskm_immediate_exec) {
334  task_team = this_thr->th.th_task_team;
335  /* If the thread's task team pointer is NULL, it means one of 3 things:
336  1) A newly-created thread is first being released by
337  __kmp_fork_barrier(), and its task team has not been set up yet.
338  2) All tasks have been executed to completion.
339  3) Tasking is off for this region. This could be because we are in a
340  serialized region (perhaps the outer one), or else tasking was manually
341  disabled (KMP_TASKING=0). */
342  if (task_team != NULL) {
343  if (TCR_SYNC_4(task_team->tt.tt_active)) {
344  if (KMP_TASKING_ENABLED(task_team))
345  flag->execute_tasks(
346  this_thr, th_gtid, final_spin,
347  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
348  else
349  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
350  } else {
351  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
352 #if OMPT_SUPPORT
353  // task-team is done now, other cases should be catched above
354  if (final_spin && ompt_enabled.enabled)
355  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
356 #endif
357  this_thr->th.th_task_team = NULL;
358  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
359  }
360  } else {
361  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
362  } // if
363  } // if
364 
365  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
366  if (TCR_4(__kmp_global.g.g_done)) {
367  if (__kmp_global.g.g_abort)
368  __kmp_abort_thread();
369  break;
370  }
371 
372  // If we are oversubscribed, or have waited a bit (and
373  // KMP_LIBRARY=throughput), then yield
374  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
375 
376 #if KMP_STATS_ENABLED
377  // Check if thread has been signalled to idle state
378  // This indicates that the logical "join-barrier" has finished
379  if (this_thr->th.th_stats->isIdle() &&
380  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
381  KMP_SET_THREAD_STATE(IDLE);
382  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
383  }
384 #endif
385  // Check if the barrier surrounding this wait loop has been cancelled
386  if (Cancellable) {
387  kmp_team_t *team = this_thr->th.th_team;
388  if (team && team->t.t_cancel_request == cancel_parallel)
389  break;
390  }
391 
392  // For hidden helper thread, if task_team is nullptr, it means the main
393  // thread has not released the barrier. We cannot wait here because once the
394  // main thread releases all children barriers, all hidden helper threads are
395  // still sleeping. This leads to a problem that following configuration,
396  // such as task team sync, will not be performed such that this thread does
397  // not have task team. Usually it is not bad. However, a corner case is,
398  // when the first task encountered is an untied task, the check in
399  // __kmp_task_alloc will crash because it uses the task team pointer without
400  // checking whether it is nullptr. It is probably under some kind of
401  // assumption.
402  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
403  !TCR_4(__kmp_hidden_helper_team_done)) {
404  // If there is still hidden helper tasks to be executed, the hidden helper
405  // thread will not enter a waiting status.
406  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
407  __kmp_hidden_helper_worker_thread_wait();
408  }
409  continue;
410  }
411 
412  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
413  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
414  __kmp_pause_status != kmp_soft_paused)
415  continue;
416 
417  // Don't suspend if there is a likelihood of new tasks being spawned.
418  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
419  continue;
420 
421 #if KMP_USE_MONITOR
422  // If we have waited a bit more, fall asleep
423  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
424  continue;
425 #else
426  if (KMP_BLOCKING(hibernate_goal, poll_count++))
427  continue;
428 #endif
429  // Don't suspend if wait loop designated non-sleepable
430  // in template parameters
431  if (!Sleepable)
432  continue;
433 
434  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
435  __kmp_pause_status != kmp_soft_paused)
436  continue;
437 
438 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
439  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
440  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
441  flag->mwait(th_gtid);
442  } else {
443 #endif
444  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
445 #if KMP_OS_UNIX
446  if (final_spin)
447  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
448 #endif
449  flag->suspend(th_gtid);
450 #if KMP_OS_UNIX
451  if (final_spin)
452  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
453 #endif
454 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
455  }
456 #endif
457 
458  if (TCR_4(__kmp_global.g.g_done)) {
459  if (__kmp_global.g.g_abort)
460  __kmp_abort_thread();
461  break;
462  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
463  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
464  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
465  }
466  // TODO: If thread is done with work and times out, disband/free
467  }
468 
469 #if OMPT_SUPPORT
470  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
471  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
472 #if OMPT_OPTIONAL
473  if (final_spin) {
474  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
475  ompt_exit_state = this_thr->th.ompt_thread_info.state;
476  }
477 #endif
478  if (ompt_exit_state == ompt_state_idle) {
479  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
480  }
481  }
482 #endif
483 #if KMP_STATS_ENABLED
484  // If we were put into idle state, pop that off the state stack
485  if (KMP_GET_THREAD_STATE() == IDLE) {
486  KMP_POP_PARTITIONED_TIMER();
487  KMP_SET_THREAD_STATE(thread_state);
488  this_thr->th.th_stats->resetIdleFlag();
489  }
490 #endif
491 
492 #if KMP_OS_UNIX
493  if (final_spin)
494  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
495 #endif
496  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
497  if (Cancellable) {
498  kmp_team_t *team = this_thr->th.th_team;
499  if (team && team->t.t_cancel_request == cancel_parallel) {
500  if (tasks_completed) {
501  // undo the previous decrement of unfinished_threads so that the
502  // thread can decrement at the join barrier with no problem
503  kmp_task_team_t *task_team = this_thr->th.th_task_team;
504  std::atomic<kmp_int32> *unfinished_threads =
505  &(task_team->tt.tt_unfinished_threads);
506  KMP_ATOMIC_INC(unfinished_threads);
507  }
508  return true;
509  }
510  }
511  return false;
512 }
513 
514 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
515 // Set up a monitor on the flag variable causing the calling thread to wait in
516 // a less active state until the flag variable is modified.
517 template <class C>
518 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
519  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
520  kmp_info_t *th = __kmp_threads[th_gtid];
521 
522  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
523  flag->get()));
524 
525  // User-level mwait is available
526  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
527 
528  __kmp_suspend_initialize_thread(th);
529  __kmp_lock_suspend_mx(th);
530 
531  volatile void *spin = flag->get();
532  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
533 
534  if (!flag->done_check()) {
535  // Mark thread as no longer active
536  th->th.th_active = FALSE;
537  if (th->th.th_active_in_pool) {
538  th->th.th_active_in_pool = FALSE;
539  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
540  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
541  }
542  flag->set_sleeping();
543  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
544 #if KMP_HAVE_UMWAIT
545  if (__kmp_umwait_enabled) {
546  __kmp_umonitor(cacheline);
547  }
548 #elif KMP_HAVE_MWAIT
549  if (__kmp_mwait_enabled) {
550  __kmp_mm_monitor(cacheline, 0, 0);
551  }
552 #endif
553  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
554  // the address could happen after the last time we checked and before
555  // monitoring started, in which case monitor can't detect the change.
556  if (flag->done_check())
557  flag->unset_sleeping();
558  else {
559  // if flag changes here, wake-up happens immediately
560  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
561  __kmp_unlock_suspend_mx(th);
562  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
563 #if KMP_HAVE_UMWAIT
564  if (__kmp_umwait_enabled) {
565  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
566  }
567 #elif KMP_HAVE_MWAIT
568  if (__kmp_mwait_enabled) {
569  __kmp_mm_mwait(0, __kmp_mwait_hints);
570  }
571 #endif
572  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
573  __kmp_lock_suspend_mx(th);
574  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
575  if (flag->is_sleeping())
576  flag->unset_sleeping();
577  TCW_PTR(th->th.th_sleep_loc, NULL);
578  }
579  // Mark thread as active again
580  th->th.th_active = TRUE;
581  if (TCR_4(th->th.th_in_pool)) {
582  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
583  th->th.th_active_in_pool = TRUE;
584  }
585  } // Drop out to main wait loop to check flag, handle tasks, etc.
586  __kmp_unlock_suspend_mx(th);
587  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
588 }
589 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
590 
591 /* Release any threads specified as waiting on the flag by releasing the flag
592  and resume the waiting thread if indicated by the sleep bit(s). A thread that
593  calls __kmp_wait_template must call this function to wake up the potentially
594  sleeping thread and prevent deadlocks! */
595 template <class C> static inline void __kmp_release_template(C *flag) {
596 #ifdef KMP_DEBUG
597  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
598 #endif
599  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
600  KMP_DEBUG_ASSERT(flag->get());
601  KMP_FSYNC_RELEASING(flag->get_void_p());
602 
603  flag->internal_release();
604 
605  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
606  flag->load()));
607 
608  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
609  // Only need to check sleep stuff if infinite block time not set.
610  // Are *any* threads waiting on flag sleeping?
611  if (flag->is_any_sleeping()) {
612  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
613  // if sleeping waiter exists at i, sets current_waiter to i inside flag
614  kmp_info_t *waiter = flag->get_waiter(i);
615  if (waiter) {
616  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
617  // Wake up thread if needed
618  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
619  "flag(%p) set\n",
620  gtid, wait_gtid, flag->get()));
621  flag->resume(wait_gtid); // unsets flag's current_waiter when done
622  }
623  }
624  }
625  }
626 }
627 
628 template <typename FlagType> struct flag_traits {};
629 
630 template <> struct flag_traits<kmp_uint32> {
631  typedef kmp_uint32 flag_t;
632  static const flag_type t = flag32;
633  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
634  static inline flag_t test_then_add4(volatile flag_t *f) {
635  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
636  }
637  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
638  return KMP_TEST_THEN_OR32(f, v);
639  }
640  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
641  return KMP_TEST_THEN_AND32(f, v);
642  }
643 };
644 
645 template <> struct flag_traits<kmp_uint64> {
646  typedef kmp_uint64 flag_t;
647  static const flag_type t = flag64;
648  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
649  static inline flag_t test_then_add4(volatile flag_t *f) {
650  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
651  }
652  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
653  return KMP_TEST_THEN_OR64(f, v);
654  }
655  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
656  return KMP_TEST_THEN_AND64(f, v);
657  }
658 };
659 
660 // Basic flag that does not use C11 Atomics
661 template <typename FlagType, bool Sleepable>
662 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
663  typedef flag_traits<FlagType> traits_type;
664  FlagType checker;
666  kmp_info_t
667  *waiting_threads[1];
668  kmp_uint32
669  num_waiting_threads;
670 public:
671  kmp_basic_flag_native(volatile FlagType *p)
672  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
673  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
674  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
675  waiting_threads[0] = thr;
676  }
677  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
678  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
679  num_waiting_threads(0) {}
684  kmp_info_t *get_waiter(kmp_uint32 i) {
685  KMP_DEBUG_ASSERT(i < num_waiting_threads);
686  return waiting_threads[i];
687  }
691  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
697  void set_waiter(kmp_info_t *thr) {
698  waiting_threads[0] = thr;
699  num_waiting_threads = 1;
700  }
704  bool done_check() {
705  if (Sleepable)
706  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
707  checker;
708  else
709  return traits_type::tcr(*(this->get())) == checker;
710  }
715  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
723  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
728  void internal_release() {
729  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
730  }
736  FlagType set_sleeping() {
737  return traits_type::test_then_or((volatile FlagType *)this->get(),
738  KMP_BARRIER_SLEEP_STATE);
739  }
745  FlagType unset_sleeping() {
746  return traits_type::test_then_and((volatile FlagType *)this->get(),
747  ~KMP_BARRIER_SLEEP_STATE);
748  }
753  bool is_sleeping_val(FlagType old_loc) {
754  return old_loc & KMP_BARRIER_SLEEP_STATE;
755  }
759  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
760  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
761  kmp_uint8 *get_stolen() { return NULL; }
762  enum barrier_type get_bt() { return bs_last_barrier; }
763 };
764 
765 template <typename FlagType, bool Sleepable>
766 class kmp_basic_flag : public kmp_flag<FlagType> {
767  typedef flag_traits<FlagType> traits_type;
768  FlagType checker;
770  kmp_info_t
771  *waiting_threads[1];
772  kmp_uint32
773  num_waiting_threads;
774 public:
775  kmp_basic_flag(std::atomic<FlagType> *p)
776  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
777  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
778  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
779  waiting_threads[0] = thr;
780  }
781  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
782  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
783  num_waiting_threads(0) {}
788  kmp_info_t *get_waiter(kmp_uint32 i) {
789  KMP_DEBUG_ASSERT(i < num_waiting_threads);
790  return waiting_threads[i];
791  }
795  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
801  void set_waiter(kmp_info_t *thr) {
802  waiting_threads[0] = thr;
803  num_waiting_threads = 1;
804  }
808  bool done_check() {
809  if (Sleepable)
810  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
811  else
812  return this->load() == checker;
813  }
818  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
826  bool notdone_check() { return this->load() != checker; }
831  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
837  FlagType set_sleeping() {
838  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
839  }
845  FlagType unset_sleeping() {
846  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
847  }
852  bool is_sleeping_val(FlagType old_loc) {
853  return old_loc & KMP_BARRIER_SLEEP_STATE;
854  }
858  bool is_sleeping() { return is_sleeping_val(this->load()); }
859  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
860  kmp_uint8 *get_stolen() { return NULL; }
861  enum barrier_type get_bt() { return bs_last_barrier; }
862 };
863 
864 template <bool Cancellable, bool Sleepable>
865 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
866 public:
867  kmp_flag_32(std::atomic<kmp_uint32> *p)
868  : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
869  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
870  : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
871  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
872  : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
873  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
874 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
875  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
876 #endif
877  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
878  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
879  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
880  kmp_int32 is_constrained) {
881  return __kmp_execute_tasks_32(
882  this_thr, gtid, this, final_spin,
883  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
884  }
885  bool wait(kmp_info_t *this_thr,
886  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
887  if (final_spin)
888  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
889  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
890  else
891  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
892  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893  }
894  void release() { __kmp_release_template(this); }
895  flag_type get_ptr_type() { return flag32; }
896 };
897 
898 template <bool Cancellable, bool Sleepable>
899 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
900 public:
901  kmp_flag_64(volatile kmp_uint64 *p)
902  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
903  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
904  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
905  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
906  : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
907  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
908 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
909  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
910 #endif
911  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
912  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
913  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
914  kmp_int32 is_constrained) {
915  return __kmp_execute_tasks_64(
916  this_thr, gtid, this, final_spin,
917  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
918  }
919  bool wait(kmp_info_t *this_thr,
920  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
921  if (final_spin)
922  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
923  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
924  else
925  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
926  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
927  }
928  void release() { __kmp_release_template(this); }
929  flag_type get_ptr_type() { return flag64; }
930 };
931 
932 // Hierarchical 64-bit on-core barrier instantiation
933 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
934  kmp_uint64 checker;
935  kmp_info_t *waiting_threads[1];
936  kmp_uint32 num_waiting_threads;
937  kmp_uint32
938  offset;
939  bool flag_switch;
940  enum barrier_type bt;
941  kmp_info_t *this_thr;
943 #if USE_ITT_BUILD
944  void *
945  itt_sync_obj;
946 #endif
947  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
948  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
949  }
950 
951 public:
952  kmp_flag_oncore(volatile kmp_uint64 *p)
953  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
954  flag_switch(false) {}
955  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
956  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
957  offset(idx), flag_switch(false) {}
958  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
959  enum barrier_type bar_t,
960  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
961  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
962  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
963  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
964  kmp_info_t *get_waiter(kmp_uint32 i) {
965  KMP_DEBUG_ASSERT(i < num_waiting_threads);
966  return waiting_threads[i];
967  }
968  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
969  void set_waiter(kmp_info_t *thr) {
970  waiting_threads[0] = thr;
971  num_waiting_threads = 1;
972  }
973  bool done_check_val(kmp_uint64 old_loc) {
974  return byteref(&old_loc, offset) == checker;
975  }
976  bool done_check() { return done_check_val(*get()); }
977  bool notdone_check() {
978  // Calculate flag_switch
979  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
980  flag_switch = true;
981  if (byteref(get(), offset) != 1 && !flag_switch)
982  return true;
983  else if (flag_switch) {
984  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
985  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
986  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
987  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
988  }
989  return false;
990  }
991  void internal_release() {
992  // Other threads can write their own bytes simultaneously.
993  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
994  byteref(get(), offset) = 1;
995  } else {
996  kmp_uint64 mask = 0;
997  byteref(&mask, offset) = 1;
998  KMP_TEST_THEN_OR64(get(), mask);
999  }
1000  }
1001  kmp_uint64 set_sleeping() {
1002  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
1003  }
1004  kmp_uint64 unset_sleeping() {
1005  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
1006  }
1007  bool is_sleeping_val(kmp_uint64 old_loc) {
1008  return old_loc & KMP_BARRIER_SLEEP_STATE;
1009  }
1010  bool is_sleeping() { return is_sleeping_val(*get()); }
1011  bool is_any_sleeping() { return is_sleeping_val(*get()); }
1012  void wait(kmp_info_t *this_thr, int final_spin) {
1013  if (final_spin)
1014  __kmp_wait_template<kmp_flag_oncore, TRUE>(
1015  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1016  else
1017  __kmp_wait_template<kmp_flag_oncore, FALSE>(
1018  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
1019  }
1020  void release() { __kmp_release_template(this); }
1021  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1022 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1023  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1024 #endif
1025  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1026  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1027  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1028  kmp_int32 is_constrained) {
1029  return __kmp_execute_tasks_oncore(
1030  this_thr, gtid, this, final_spin,
1031  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1032  }
1033  kmp_uint8 *get_stolen() { return NULL; }
1034  enum barrier_type get_bt() { return bt; }
1035  flag_type get_ptr_type() { return flag_oncore; }
1036 };
1037 
1038 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
1039 // associated with int gtid.
1040 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
1041  if (!flag)
1042  return;
1043 
1044  switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
1045  case flag32:
1046  __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
1047  break;
1048  case flag64:
1049  __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
1050  break;
1051  case flag_oncore:
1052  __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
1053  break;
1054  }
1055 }
1056 
1061 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * get()
void * get_void_p()
flag_type get_type()
void store(P val)
void set(std::atomic< P > *new_loc)
std::atomic< P > * loc
flag_properties t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
flag_type
@ flag64
@ flag_oncore
@ flag32