LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  __kmp_assign_root_init_mask();
43  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44  } else if (__kmp_ignore_mppbeg() == FALSE) {
45  // By default __kmp_ignore_mppbeg() returns TRUE.
46  __kmp_internal_begin();
47  KC_TRACE(10, ("__kmpc_begin: called\n"));
48  }
49 }
50 
59 void __kmpc_end(ident_t *loc) {
60  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63  // returns FALSE and __kmpc_end() will unregister this root (it can cause
64  // library shut down).
65  if (__kmp_ignore_mppend() == FALSE) {
66  KC_TRACE(10, ("__kmpc_end: called\n"));
67  KA_TRACE(30, ("__kmpc_end\n"));
68 
69  __kmp_internal_end_thread(-1);
70  }
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72  // Normal exit process on Windows does not allow worker threads of the final
73  // parallel region to finish reporting their events, so shutting down the
74  // library here fixes the issue at least for the cases where __kmpc_end() is
75  // placed properly.
76  if (ompt_enabled.enabled)
77  __kmp_internal_end_library(__kmp_gtid_get_specific());
78 #endif
79 }
80 
100  kmp_int32 gtid = __kmp_entry_gtid();
101 
102  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103 
104  return gtid;
105 }
106 
122  KC_TRACE(10,
123  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 
125  return TCR_4(__kmp_all_nth);
126 }
127 
135  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136  return __kmp_tid_from_gtid(__kmp_entry_gtid());
137 }
138 
145  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 
147  return __kmp_entry_thread()->th.th_team->t.t_nproc;
148 }
149 
156 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157 #ifndef KMP_DEBUG
158 
159  return TRUE;
160 
161 #else
162 
163  const char *semi2;
164  const char *semi3;
165  int line_no;
166 
167  if (__kmp_par_range == 0) {
168  return TRUE;
169  }
170  semi2 = loc->psource;
171  if (semi2 == NULL) {
172  return TRUE;
173  }
174  semi2 = strchr(semi2, ';');
175  if (semi2 == NULL) {
176  return TRUE;
177  }
178  semi2 = strchr(semi2 + 1, ';');
179  if (semi2 == NULL) {
180  return TRUE;
181  }
182  if (__kmp_par_range_filename[0]) {
183  const char *name = semi2 - 1;
184  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185  name--;
186  }
187  if ((*name == '/') || (*name == ';')) {
188  name++;
189  }
190  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191  return __kmp_par_range < 0;
192  }
193  }
194  semi3 = strchr(semi2 + 1, ';');
195  if (__kmp_par_range_routine[0]) {
196  if ((semi3 != NULL) && (semi3 > semi2) &&
197  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198  return __kmp_par_range < 0;
199  }
200  }
201  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203  return __kmp_par_range > 0;
204  }
205  return __kmp_par_range < 0;
206  }
207  return TRUE;
208 
209 #endif /* KMP_DEBUG */
210 }
211 
218 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
219  return __kmp_entry_thread()->th.th_root->r.r_active;
220 }
221 
231 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232  kmp_int32 num_threads) {
233  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234  global_tid, num_threads));
235  __kmp_assert_valid_gtid(global_tid);
236  __kmp_push_num_threads(loc, global_tid, num_threads);
237 }
238 
239 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248  __kmp_assert_valid_gtid(global_tid);
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293  }
294  OMPT_STORE_RETURN_ADDRESS(gtid);
295 #endif
296 
297 #if INCLUDE_SSC_MARKS
298  SSC_MARK_FORKING();
299 #endif
300  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303  kmp_va_addr_of(ap));
304 #if INCLUDE_SSC_MARKS
305  SSC_MARK_JOINING();
306 #endif
307  __kmp_join_call(loc, gtid
308 #if OMPT_SUPPORT
309  ,
310  fork_context_intel
311 #endif
312  );
313 
314  va_end(ap);
315 
316 #if OMPT_SUPPORT
317  if (ompt_enabled.enabled) {
318  ompt_frame->enter_frame = ompt_data_none;
319  }
320 #endif
321  }
322 
323 #if KMP_STATS_ENABLED
324  if (previous_state == stats_state_e::SERIAL_REGION) {
325  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326  KMP_SET_THREAD_STATE(previous_state);
327  } else {
328  KMP_POP_PARTITIONED_TIMER();
329  }
330 #endif // KMP_STATS_ENABLED
331 }
332 
344 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345  kmp_int32 num_teams, kmp_int32 num_threads) {
346  KA_TRACE(20,
347  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348  global_tid, num_teams, num_threads));
349  __kmp_assert_valid_gtid(global_tid);
350  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351 }
352 
369 void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370  kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371  kmp_int32 num_threads) {
372  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373  " num_teams_ub=%d num_threads=%d\n",
374  global_tid, num_teams_lb, num_teams_ub, num_threads));
375  __kmp_assert_valid_gtid(global_tid);
376  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377  num_threads);
378 }
379 
390 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391  ...) {
392  int gtid = __kmp_entry_gtid();
393  kmp_info_t *this_thr = __kmp_threads[gtid];
394  va_list ap;
395  va_start(ap, microtask);
396 
397 #if KMP_STATS_ENABLED
398  KMP_COUNT_BLOCK(OMP_TEAMS);
399  stats_state_e previous_state = KMP_GET_THREAD_STATE();
400  if (previous_state == stats_state_e::SERIAL_REGION) {
401  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402  } else {
403  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404  }
405 #endif
406 
407  // remember teams entry point and nesting level
408  this_thr->th.th_teams_microtask = microtask;
409  this_thr->th.th_teams_level =
410  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411 
412 #if OMPT_SUPPORT
413  kmp_team_t *parent_team = this_thr->th.th_team;
414  int tid = __kmp_tid_from_gtid(gtid);
415  if (ompt_enabled.enabled) {
416  parent_team->t.t_implicit_task_taskdata[tid]
417  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418  }
419  OMPT_STORE_RETURN_ADDRESS(gtid);
420 #endif
421 
422  // check if __kmpc_push_num_teams called, set default number of teams
423  // otherwise
424  if (this_thr->th.th_teams_size.nteams == 0) {
425  __kmp_push_num_teams(loc, gtid, 0, 0);
426  }
427  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430 
431  __kmp_fork_call(
432  loc, gtid, fork_context_intel, argc,
433  VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435  __kmp_join_call(loc, gtid
436 #if OMPT_SUPPORT
437  ,
438  fork_context_intel
439 #endif
440  );
441 
442  // Pop current CG root off list
443  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445  this_thr->th.th_cg_roots = tmp->up;
446  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447  " to node %p. cg_nthreads was %d\n",
448  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450  int i = tmp->cg_nthreads--;
451  if (i == 1) { // check is we are the last thread in CG (not always the case)
452  __kmp_free(tmp);
453  }
454  // Restore current task's thread_limit from CG root
455  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456  this_thr->th.th_current_task->td_icvs.thread_limit =
457  this_thr->th.th_cg_roots->cg_thread_limit;
458 
459  this_thr->th.th_teams_microtask = NULL;
460  this_thr->th.th_teams_level = 0;
461  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462  va_end(ap);
463 #if KMP_STATS_ENABLED
464  if (previous_state == stats_state_e::SERIAL_REGION) {
465  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466  KMP_SET_THREAD_STATE(previous_state);
467  } else {
468  KMP_POP_PARTITIONED_TIMER();
469  }
470 #endif // KMP_STATS_ENABLED
471 }
472 
473 // I don't think this function should ever have been exported.
474 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475 // openmp code ever called it, but it's been exported from the RTL for so
476 // long that I'm afraid to remove the definition.
477 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478 
491 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492  // The implementation is now in kmp_runtime.cpp so that it can share static
493  // functions with kmp_fork_call since the tasks to be done are similar in
494  // each case.
495  __kmp_assert_valid_gtid(global_tid);
496 #if OMPT_SUPPORT
497  OMPT_STORE_RETURN_ADDRESS(global_tid);
498 #endif
499  __kmp_serialized_parallel(loc, global_tid);
500 }
501 
509 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510  kmp_internal_control_t *top;
511  kmp_info_t *this_thr;
512  kmp_team_t *serial_team;
513 
514  KC_TRACE(10,
515  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516 
517  /* skip all this code for autopar serialized loops since it results in
518  unacceptable overhead */
519  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520  return;
521 
522  // Not autopar code
523  __kmp_assert_valid_gtid(global_tid);
524  if (!TCR_4(__kmp_init_parallel))
525  __kmp_parallel_initialize();
526 
527  __kmp_resume_if_soft_paused();
528 
529  this_thr = __kmp_threads[global_tid];
530  serial_team = this_thr->th.th_serial_team;
531 
532  kmp_task_team_t *task_team = this_thr->th.th_task_team;
533  // we need to wait for the proxy tasks before finishing the thread
534  if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535  task_team->tt.tt_hidden_helper_task_encountered))
536  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537 
538  KMP_MB();
539  KMP_DEBUG_ASSERT(serial_team);
540  KMP_ASSERT(serial_team->t.t_serialized);
541  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545 
546 #if OMPT_SUPPORT
547  if (ompt_enabled.enabled &&
548  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550  if (ompt_enabled.ompt_callback_implicit_task) {
551  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554  }
555 
556  // reset clear the task id only after unlinking the task
557  ompt_data_t *parent_task_data;
558  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559 
560  if (ompt_enabled.ompt_callback_parallel_end) {
561  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563  ompt_parallel_invoker_program | ompt_parallel_team,
564  OMPT_LOAD_RETURN_ADDRESS(global_tid));
565  }
566  __ompt_lw_taskteam_unlink(this_thr);
567  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568  }
569 #endif
570 
571  /* If necessary, pop the internal control stack values and replace the team
572  * values */
573  top = serial_team->t.t_control_stack_top;
574  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576  serial_team->t.t_control_stack_top = top->next;
577  __kmp_free(top);
578  }
579 
580  /* pop dispatch buffers stack */
581  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582  {
583  dispatch_private_info_t *disp_buffer =
584  serial_team->t.t_dispatch->th_disp_buffer;
585  serial_team->t.t_dispatch->th_disp_buffer =
586  serial_team->t.t_dispatch->th_disp_buffer->next;
587  __kmp_free(disp_buffer);
588  }
589  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590 
591  --serial_team->t.t_serialized;
592  if (serial_team->t.t_serialized == 0) {
593 
594  /* return to the parallel section */
595 
596 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
597  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598  __kmp_clear_x87_fpu_status_word();
599  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601  }
602 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603 
604  __kmp_pop_current_task_from_thread(this_thr);
605 #if OMPD_SUPPORT
606  if (ompd_state & OMPD_ENABLE_BP)
607  ompd_bp_parallel_end();
608 #endif
609 
610  this_thr->th.th_team = serial_team->t.t_parent;
611  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612 
613  /* restore values cached in the thread */
614  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615  this_thr->th.th_team_master =
616  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618 
619  /* TODO the below shouldn't need to be adjusted for serialized teams */
620  this_thr->th.th_dispatch =
621  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622 
623  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624  this_thr->th.th_current_task->td_flags.executing = 1;
625 
626  if (__kmp_tasking_mode != tskm_immediate_exec) {
627  // Copy the task team from the new child / old parent team to the thread.
628  this_thr->th.th_task_team =
629  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630  KA_TRACE(20,
631  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632  "team %p\n",
633  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634  }
635  } else {
636  if (__kmp_tasking_mode != tskm_immediate_exec) {
637  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
638  "depth of serial team %p to %d\n",
639  global_tid, serial_team, serial_team->t.t_serialized));
640  }
641  }
642 
643  serial_team->t.t_level--;
644  if (__kmp_env_consistency_check)
645  __kmp_pop_parallel(global_tid, NULL);
646 #if OMPT_SUPPORT
647  if (ompt_enabled.enabled)
648  this_thr->th.ompt_thread_info.state =
649  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
650  : ompt_state_work_parallel);
651 #endif
652 }
653 
662 void __kmpc_flush(ident_t *loc) {
663  KC_TRACE(10, ("__kmpc_flush: called\n"));
664 
665  /* need explicit __mf() here since use volatile instead in library */
666  KMP_MB(); /* Flush all pending memory write invalidates. */
667 
668 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
669 #if KMP_MIC
670 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
671 // We shouldn't need it, though, since the ABI rules require that
672 // * If the compiler generates NGO stores it also generates the fence
673 // * If users hand-code NGO stores they should insert the fence
674 // therefore no incomplete unordered stores should be visible.
675 #else
676  // C74404
677  // This is to address non-temporal store instructions (sfence needed).
678  // The clflush instruction is addressed either (mfence needed).
679  // Probably the non-temporal load monvtdqa instruction should also be
680  // addressed.
681  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
682  if (!__kmp_cpuinfo.initialized) {
683  __kmp_query_cpuid(&__kmp_cpuinfo);
684  }
685  if (!__kmp_cpuinfo.flags.sse2) {
686  // CPU cannot execute SSE2 instructions.
687  } else {
688 #if KMP_COMPILER_ICC
689  _mm_mfence();
690 #elif KMP_COMPILER_MSVC
691  MemoryBarrier();
692 #else
693  __sync_synchronize();
694 #endif // KMP_COMPILER_ICC
695  }
696 #endif // KMP_MIC
697 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
698  KMP_ARCH_RISCV64)
699 // Nothing to see here move along
700 #elif KMP_ARCH_PPC64
701 // Nothing needed here (we have a real MB above).
702 #else
703 #error Unknown or unsupported architecture
704 #endif
705 
706 #if OMPT_SUPPORT && OMPT_OPTIONAL
707  if (ompt_enabled.ompt_callback_flush) {
708  ompt_callbacks.ompt_callback(ompt_callback_flush)(
709  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
710  }
711 #endif
712 }
713 
714 /* -------------------------------------------------------------------------- */
722 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
723  KMP_COUNT_BLOCK(OMP_BARRIER);
724  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
725  __kmp_assert_valid_gtid(global_tid);
726 
727  if (!TCR_4(__kmp_init_parallel))
728  __kmp_parallel_initialize();
729 
730  __kmp_resume_if_soft_paused();
731 
732  if (__kmp_env_consistency_check) {
733  if (loc == 0) {
734  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
735  }
736  __kmp_check_barrier(global_tid, ct_barrier, loc);
737  }
738 
739 #if OMPT_SUPPORT
740  ompt_frame_t *ompt_frame;
741  if (ompt_enabled.enabled) {
742  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
743  if (ompt_frame->enter_frame.ptr == NULL)
744  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
745  }
746  OMPT_STORE_RETURN_ADDRESS(global_tid);
747 #endif
748  __kmp_threads[global_tid]->th.th_ident = loc;
749  // TODO: explicit barrier_wait_id:
750  // this function is called when 'barrier' directive is present or
751  // implicit barrier at the end of a worksharing construct.
752  // 1) better to add a per-thread barrier counter to a thread data structure
753  // 2) set to 0 when a new team is created
754  // 4) no sync is required
755 
756  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
757 #if OMPT_SUPPORT && OMPT_OPTIONAL
758  if (ompt_enabled.enabled) {
759  ompt_frame->enter_frame = ompt_data_none;
760  }
761 #endif
762 }
763 
764 /* The BARRIER for a MASTER section is always explicit */
771 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
772  int status = 0;
773 
774  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
775  __kmp_assert_valid_gtid(global_tid);
776 
777  if (!TCR_4(__kmp_init_parallel))
778  __kmp_parallel_initialize();
779 
780  __kmp_resume_if_soft_paused();
781 
782  if (KMP_MASTER_GTID(global_tid)) {
783  KMP_COUNT_BLOCK(OMP_MASTER);
784  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
785  status = 1;
786  }
787 
788 #if OMPT_SUPPORT && OMPT_OPTIONAL
789  if (status) {
790  if (ompt_enabled.ompt_callback_masked) {
791  kmp_info_t *this_thr = __kmp_threads[global_tid];
792  kmp_team_t *team = this_thr->th.th_team;
793 
794  int tid = __kmp_tid_from_gtid(global_tid);
795  ompt_callbacks.ompt_callback(ompt_callback_masked)(
796  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
797  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
798  OMPT_GET_RETURN_ADDRESS(0));
799  }
800  }
801 #endif
802 
803  if (__kmp_env_consistency_check) {
804 #if KMP_USE_DYNAMIC_LOCK
805  if (status)
806  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
807  else
808  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
809 #else
810  if (status)
811  __kmp_push_sync(global_tid, ct_master, loc, NULL);
812  else
813  __kmp_check_sync(global_tid, ct_master, loc, NULL);
814 #endif
815  }
816 
817  return status;
818 }
819 
828 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
829  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
830  __kmp_assert_valid_gtid(global_tid);
831  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
832  KMP_POP_PARTITIONED_TIMER();
833 
834 #if OMPT_SUPPORT && OMPT_OPTIONAL
835  kmp_info_t *this_thr = __kmp_threads[global_tid];
836  kmp_team_t *team = this_thr->th.th_team;
837  if (ompt_enabled.ompt_callback_masked) {
838  int tid = __kmp_tid_from_gtid(global_tid);
839  ompt_callbacks.ompt_callback(ompt_callback_masked)(
840  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
841  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
842  OMPT_GET_RETURN_ADDRESS(0));
843  }
844 #endif
845 
846  if (__kmp_env_consistency_check) {
847  if (KMP_MASTER_GTID(global_tid))
848  __kmp_pop_sync(global_tid, ct_master, loc);
849  }
850 }
851 
860 kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
861  int status = 0;
862  int tid;
863  KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
864  __kmp_assert_valid_gtid(global_tid);
865 
866  if (!TCR_4(__kmp_init_parallel))
867  __kmp_parallel_initialize();
868 
869  __kmp_resume_if_soft_paused();
870 
871  tid = __kmp_tid_from_gtid(global_tid);
872  if (tid == filter) {
873  KMP_COUNT_BLOCK(OMP_MASKED);
874  KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
875  status = 1;
876  }
877 
878 #if OMPT_SUPPORT && OMPT_OPTIONAL
879  if (status) {
880  if (ompt_enabled.ompt_callback_masked) {
881  kmp_info_t *this_thr = __kmp_threads[global_tid];
882  kmp_team_t *team = this_thr->th.th_team;
883  ompt_callbacks.ompt_callback(ompt_callback_masked)(
884  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
885  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
886  OMPT_GET_RETURN_ADDRESS(0));
887  }
888  }
889 #endif
890 
891  if (__kmp_env_consistency_check) {
892 #if KMP_USE_DYNAMIC_LOCK
893  if (status)
894  __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
895  else
896  __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
897 #else
898  if (status)
899  __kmp_push_sync(global_tid, ct_masked, loc, NULL);
900  else
901  __kmp_check_sync(global_tid, ct_masked, loc, NULL);
902 #endif
903  }
904 
905  return status;
906 }
907 
916 void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
917  KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
918  __kmp_assert_valid_gtid(global_tid);
919  KMP_POP_PARTITIONED_TIMER();
920 
921 #if OMPT_SUPPORT && OMPT_OPTIONAL
922  kmp_info_t *this_thr = __kmp_threads[global_tid];
923  kmp_team_t *team = this_thr->th.th_team;
924  if (ompt_enabled.ompt_callback_masked) {
925  int tid = __kmp_tid_from_gtid(global_tid);
926  ompt_callbacks.ompt_callback(ompt_callback_masked)(
927  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
928  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
929  OMPT_GET_RETURN_ADDRESS(0));
930  }
931 #endif
932 
933  if (__kmp_env_consistency_check) {
934  __kmp_pop_sync(global_tid, ct_masked, loc);
935  }
936 }
937 
945 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
946  int cid = 0;
947  kmp_info_t *th;
948  KMP_DEBUG_ASSERT(__kmp_init_serial);
949 
950  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
951  __kmp_assert_valid_gtid(gtid);
952 
953  if (!TCR_4(__kmp_init_parallel))
954  __kmp_parallel_initialize();
955 
956  __kmp_resume_if_soft_paused();
957 
958 #if USE_ITT_BUILD
959  __kmp_itt_ordered_prep(gtid);
960 // TODO: ordered_wait_id
961 #endif /* USE_ITT_BUILD */
962 
963  th = __kmp_threads[gtid];
964 
965 #if OMPT_SUPPORT && OMPT_OPTIONAL
966  kmp_team_t *team;
967  ompt_wait_id_t lck;
968  void *codeptr_ra;
969  OMPT_STORE_RETURN_ADDRESS(gtid);
970  if (ompt_enabled.enabled) {
971  team = __kmp_team_from_gtid(gtid);
972  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
973  /* OMPT state update */
974  th->th.ompt_thread_info.wait_id = lck;
975  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
976 
977  /* OMPT event callback */
978  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
979  if (ompt_enabled.ompt_callback_mutex_acquire) {
980  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
981  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
982  codeptr_ra);
983  }
984  }
985 #endif
986 
987  if (th->th.th_dispatch->th_deo_fcn != 0)
988  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
989  else
990  __kmp_parallel_deo(&gtid, &cid, loc);
991 
992 #if OMPT_SUPPORT && OMPT_OPTIONAL
993  if (ompt_enabled.enabled) {
994  /* OMPT state update */
995  th->th.ompt_thread_info.state = ompt_state_work_parallel;
996  th->th.ompt_thread_info.wait_id = 0;
997 
998  /* OMPT event callback */
999  if (ompt_enabled.ompt_callback_mutex_acquired) {
1000  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1001  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1002  }
1003  }
1004 #endif
1005 
1006 #if USE_ITT_BUILD
1007  __kmp_itt_ordered_start(gtid);
1008 #endif /* USE_ITT_BUILD */
1009 }
1010 
1018 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1019  int cid = 0;
1020  kmp_info_t *th;
1021 
1022  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1023  __kmp_assert_valid_gtid(gtid);
1024 
1025 #if USE_ITT_BUILD
1026  __kmp_itt_ordered_end(gtid);
1027 // TODO: ordered_wait_id
1028 #endif /* USE_ITT_BUILD */
1029 
1030  th = __kmp_threads[gtid];
1031 
1032  if (th->th.th_dispatch->th_dxo_fcn != 0)
1033  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1034  else
1035  __kmp_parallel_dxo(&gtid, &cid, loc);
1036 
1037 #if OMPT_SUPPORT && OMPT_OPTIONAL
1038  OMPT_STORE_RETURN_ADDRESS(gtid);
1039  if (ompt_enabled.ompt_callback_mutex_released) {
1040  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1041  ompt_mutex_ordered,
1042  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1043  ->t.t_ordered.dt.t_value,
1044  OMPT_LOAD_RETURN_ADDRESS(gtid));
1045  }
1046 #endif
1047 }
1048 
1049 #if KMP_USE_DYNAMIC_LOCK
1050 
1051 static __forceinline void
1052 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1053  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1054  // Pointer to the allocated indirect lock is written to crit, while indexing
1055  // is ignored.
1056  void *idx;
1057  kmp_indirect_lock_t **lck;
1058  lck = (kmp_indirect_lock_t **)crit;
1059  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1060  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1061  KMP_SET_I_LOCK_LOCATION(ilk, loc);
1062  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1063  KA_TRACE(20,
1064  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1065 #if USE_ITT_BUILD
1066  __kmp_itt_critical_creating(ilk->lock, loc);
1067 #endif
1068  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1069  if (status == 0) {
1070 #if USE_ITT_BUILD
1071  __kmp_itt_critical_destroyed(ilk->lock);
1072 #endif
1073  // We don't really need to destroy the unclaimed lock here since it will be
1074  // cleaned up at program exit.
1075  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1076  }
1077  KMP_DEBUG_ASSERT(*lck != NULL);
1078 }
1079 
1080 // Fast-path acquire tas lock
1081 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1082  { \
1083  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1084  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1085  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1086  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1087  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1088  kmp_uint32 spins; \
1089  KMP_FSYNC_PREPARE(l); \
1090  KMP_INIT_YIELD(spins); \
1091  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1092  do { \
1093  if (TCR_4(__kmp_nth) > \
1094  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1095  KMP_YIELD(TRUE); \
1096  } else { \
1097  KMP_YIELD_SPIN(spins); \
1098  } \
1099  __kmp_spin_backoff(&backoff); \
1100  } while ( \
1101  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1102  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1103  } \
1104  KMP_FSYNC_ACQUIRED(l); \
1105  }
1106 
1107 // Fast-path test tas lock
1108 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1109  { \
1110  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1111  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1112  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1113  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1114  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1115  }
1116 
1117 // Fast-path release tas lock
1118 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1119  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1120 
1121 #if KMP_USE_FUTEX
1122 
1123 #include <sys/syscall.h>
1124 #include <unistd.h>
1125 #ifndef FUTEX_WAIT
1126 #define FUTEX_WAIT 0
1127 #endif
1128 #ifndef FUTEX_WAKE
1129 #define FUTEX_WAKE 1
1130 #endif
1131 
1132 // Fast-path acquire futex lock
1133 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1134  { \
1135  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1136  kmp_int32 gtid_code = (gtid + 1) << 1; \
1137  KMP_MB(); \
1138  KMP_FSYNC_PREPARE(ftx); \
1139  kmp_int32 poll_val; \
1140  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1141  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1142  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1143  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1144  if (!cond) { \
1145  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1146  poll_val | \
1147  KMP_LOCK_BUSY(1, futex))) { \
1148  continue; \
1149  } \
1150  poll_val |= KMP_LOCK_BUSY(1, futex); \
1151  } \
1152  kmp_int32 rc; \
1153  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1154  NULL, NULL, 0)) != 0) { \
1155  continue; \
1156  } \
1157  gtid_code |= 1; \
1158  } \
1159  KMP_FSYNC_ACQUIRED(ftx); \
1160  }
1161 
1162 // Fast-path test futex lock
1163 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1164  { \
1165  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1166  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1167  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1168  KMP_FSYNC_ACQUIRED(ftx); \
1169  rc = TRUE; \
1170  } else { \
1171  rc = FALSE; \
1172  } \
1173  }
1174 
1175 // Fast-path release futex lock
1176 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1177  { \
1178  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1179  KMP_MB(); \
1180  KMP_FSYNC_RELEASING(ftx); \
1181  kmp_int32 poll_val = \
1182  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1183  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1184  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1185  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1186  } \
1187  KMP_MB(); \
1188  KMP_YIELD_OVERSUB(); \
1189  }
1190 
1191 #endif // KMP_USE_FUTEX
1192 
1193 #else // KMP_USE_DYNAMIC_LOCK
1194 
1195 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1196  ident_t const *loc,
1197  kmp_int32 gtid) {
1198  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1199 
1200  // Because of the double-check, the following load doesn't need to be volatile
1201  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1202 
1203  if (lck == NULL) {
1204  void *idx;
1205 
1206  // Allocate & initialize the lock.
1207  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1208  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1209  __kmp_init_user_lock_with_checks(lck);
1210  __kmp_set_user_lock_location(lck, loc);
1211 #if USE_ITT_BUILD
1212  __kmp_itt_critical_creating(lck);
1213 // __kmp_itt_critical_creating() should be called *before* the first usage
1214 // of underlying lock. It is the only place where we can guarantee it. There
1215 // are chances the lock will destroyed with no usage, but it is not a
1216 // problem, because this is not real event seen by user but rather setting
1217 // name for object (lock). See more details in kmp_itt.h.
1218 #endif /* USE_ITT_BUILD */
1219 
1220  // Use a cmpxchg instruction to slam the start of the critical section with
1221  // the lock pointer. If another thread beat us to it, deallocate the lock,
1222  // and use the lock that the other thread allocated.
1223  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1224 
1225  if (status == 0) {
1226 // Deallocate the lock and reload the value.
1227 #if USE_ITT_BUILD
1228  __kmp_itt_critical_destroyed(lck);
1229 // Let ITT know the lock is destroyed and the same memory location may be reused
1230 // for another purpose.
1231 #endif /* USE_ITT_BUILD */
1232  __kmp_destroy_user_lock_with_checks(lck);
1233  __kmp_user_lock_free(&idx, gtid, lck);
1234  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1235  KMP_DEBUG_ASSERT(lck != NULL);
1236  }
1237  }
1238  return lck;
1239 }
1240 
1241 #endif // KMP_USE_DYNAMIC_LOCK
1242 
1253 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1254  kmp_critical_name *crit) {
1255 #if KMP_USE_DYNAMIC_LOCK
1256 #if OMPT_SUPPORT && OMPT_OPTIONAL
1257  OMPT_STORE_RETURN_ADDRESS(global_tid);
1258 #endif // OMPT_SUPPORT
1259  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1260 #else
1261  KMP_COUNT_BLOCK(OMP_CRITICAL);
1262 #if OMPT_SUPPORT && OMPT_OPTIONAL
1263  ompt_state_t prev_state = ompt_state_undefined;
1264  ompt_thread_info_t ti;
1265 #endif
1266  kmp_user_lock_p lck;
1267 
1268  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1269  __kmp_assert_valid_gtid(global_tid);
1270 
1271  // TODO: add THR_OVHD_STATE
1272 
1273  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1274  KMP_CHECK_USER_LOCK_INIT();
1275 
1276  if ((__kmp_user_lock_kind == lk_tas) &&
1277  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1278  lck = (kmp_user_lock_p)crit;
1279  }
1280 #if KMP_USE_FUTEX
1281  else if ((__kmp_user_lock_kind == lk_futex) &&
1282  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1283  lck = (kmp_user_lock_p)crit;
1284  }
1285 #endif
1286  else { // ticket, queuing or drdpa
1287  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1288  }
1289 
1290  if (__kmp_env_consistency_check)
1291  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1292 
1293  // since the critical directive binds to all threads, not just the current
1294  // team we have to check this even if we are in a serialized team.
1295  // also, even if we are the uber thread, we still have to conduct the lock,
1296  // as we have to contend with sibling threads.
1297 
1298 #if USE_ITT_BUILD
1299  __kmp_itt_critical_acquiring(lck);
1300 #endif /* USE_ITT_BUILD */
1301 #if OMPT_SUPPORT && OMPT_OPTIONAL
1302  OMPT_STORE_RETURN_ADDRESS(gtid);
1303  void *codeptr_ra = NULL;
1304  if (ompt_enabled.enabled) {
1305  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1306  /* OMPT state update */
1307  prev_state = ti.state;
1308  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1309  ti.state = ompt_state_wait_critical;
1310 
1311  /* OMPT event callback */
1312  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1313  if (ompt_enabled.ompt_callback_mutex_acquire) {
1314  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1315  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1316  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1317  }
1318  }
1319 #endif
1320  // Value of 'crit' should be good for using as a critical_id of the critical
1321  // section directive.
1322  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1323 
1324 #if USE_ITT_BUILD
1325  __kmp_itt_critical_acquired(lck);
1326 #endif /* USE_ITT_BUILD */
1327 #if OMPT_SUPPORT && OMPT_OPTIONAL
1328  if (ompt_enabled.enabled) {
1329  /* OMPT state update */
1330  ti.state = prev_state;
1331  ti.wait_id = 0;
1332 
1333  /* OMPT event callback */
1334  if (ompt_enabled.ompt_callback_mutex_acquired) {
1335  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1336  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1337  }
1338  }
1339 #endif
1340  KMP_POP_PARTITIONED_TIMER();
1341 
1342  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1343  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1344 #endif // KMP_USE_DYNAMIC_LOCK
1345 }
1346 
1347 #if KMP_USE_DYNAMIC_LOCK
1348 
1349 // Converts the given hint to an internal lock implementation
1350 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1351 #if KMP_USE_TSX
1352 #define KMP_TSX_LOCK(seq) lockseq_##seq
1353 #else
1354 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1355 #endif
1356 
1357 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1358 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1359 #else
1360 #define KMP_CPUINFO_RTM 0
1361 #endif
1362 
1363  // Hints that do not require further logic
1364  if (hint & kmp_lock_hint_hle)
1365  return KMP_TSX_LOCK(hle);
1366  if (hint & kmp_lock_hint_rtm)
1367  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1368  if (hint & kmp_lock_hint_adaptive)
1369  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1370 
1371  // Rule out conflicting hints first by returning the default lock
1372  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1373  return __kmp_user_lock_seq;
1374  if ((hint & omp_lock_hint_speculative) &&
1375  (hint & omp_lock_hint_nonspeculative))
1376  return __kmp_user_lock_seq;
1377 
1378  // Do not even consider speculation when it appears to be contended
1379  if (hint & omp_lock_hint_contended)
1380  return lockseq_queuing;
1381 
1382  // Uncontended lock without speculation
1383  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1384  return lockseq_tas;
1385 
1386  // Use RTM lock for speculation
1387  if (hint & omp_lock_hint_speculative)
1388  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1389 
1390  return __kmp_user_lock_seq;
1391 }
1392 
1393 #if OMPT_SUPPORT && OMPT_OPTIONAL
1394 #if KMP_USE_DYNAMIC_LOCK
1395 static kmp_mutex_impl_t
1396 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1397  if (user_lock) {
1398  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1399  case 0:
1400  break;
1401 #if KMP_USE_FUTEX
1402  case locktag_futex:
1403  return kmp_mutex_impl_queuing;
1404 #endif
1405  case locktag_tas:
1406  return kmp_mutex_impl_spin;
1407 #if KMP_USE_TSX
1408  case locktag_hle:
1409  case locktag_rtm_spin:
1410  return kmp_mutex_impl_speculative;
1411 #endif
1412  default:
1413  return kmp_mutex_impl_none;
1414  }
1415  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1416  }
1417  KMP_ASSERT(ilock);
1418  switch (ilock->type) {
1419 #if KMP_USE_TSX
1420  case locktag_adaptive:
1421  case locktag_rtm_queuing:
1422  return kmp_mutex_impl_speculative;
1423 #endif
1424  case locktag_nested_tas:
1425  return kmp_mutex_impl_spin;
1426 #if KMP_USE_FUTEX
1427  case locktag_nested_futex:
1428 #endif
1429  case locktag_ticket:
1430  case locktag_queuing:
1431  case locktag_drdpa:
1432  case locktag_nested_ticket:
1433  case locktag_nested_queuing:
1434  case locktag_nested_drdpa:
1435  return kmp_mutex_impl_queuing;
1436  default:
1437  return kmp_mutex_impl_none;
1438  }
1439 }
1440 #else
1441 // For locks without dynamic binding
1442 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1443  switch (__kmp_user_lock_kind) {
1444  case lk_tas:
1445  return kmp_mutex_impl_spin;
1446 #if KMP_USE_FUTEX
1447  case lk_futex:
1448 #endif
1449  case lk_ticket:
1450  case lk_queuing:
1451  case lk_drdpa:
1452  return kmp_mutex_impl_queuing;
1453 #if KMP_USE_TSX
1454  case lk_hle:
1455  case lk_rtm_queuing:
1456  case lk_rtm_spin:
1457  case lk_adaptive:
1458  return kmp_mutex_impl_speculative;
1459 #endif
1460  default:
1461  return kmp_mutex_impl_none;
1462  }
1463 }
1464 #endif // KMP_USE_DYNAMIC_LOCK
1465 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1466 
1480 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1481  kmp_critical_name *crit, uint32_t hint) {
1482  KMP_COUNT_BLOCK(OMP_CRITICAL);
1483  kmp_user_lock_p lck;
1484 #if OMPT_SUPPORT && OMPT_OPTIONAL
1485  ompt_state_t prev_state = ompt_state_undefined;
1486  ompt_thread_info_t ti;
1487  // This is the case, if called from __kmpc_critical:
1488  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1489  if (!codeptr)
1490  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1491 #endif
1492 
1493  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1494  __kmp_assert_valid_gtid(global_tid);
1495 
1496  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1497  // Check if it is initialized.
1498  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1499  kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1500  if (*lk == 0) {
1501  if (KMP_IS_D_LOCK(lockseq)) {
1502  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1503  KMP_GET_D_TAG(lockseq));
1504  } else {
1505  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1506  }
1507  }
1508  // Branch for accessing the actual lock object and set operation. This
1509  // branching is inevitable since this lock initialization does not follow the
1510  // normal dispatch path (lock table is not used).
1511  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1512  lck = (kmp_user_lock_p)lk;
1513  if (__kmp_env_consistency_check) {
1514  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1515  __kmp_map_hint_to_lock(hint));
1516  }
1517 #if USE_ITT_BUILD
1518  __kmp_itt_critical_acquiring(lck);
1519 #endif
1520 #if OMPT_SUPPORT && OMPT_OPTIONAL
1521  if (ompt_enabled.enabled) {
1522  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1523  /* OMPT state update */
1524  prev_state = ti.state;
1525  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1526  ti.state = ompt_state_wait_critical;
1527 
1528  /* OMPT event callback */
1529  if (ompt_enabled.ompt_callback_mutex_acquire) {
1530  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1531  ompt_mutex_critical, (unsigned int)hint,
1532  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1533  codeptr);
1534  }
1535  }
1536 #endif
1537 #if KMP_USE_INLINED_TAS
1538  if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1539  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1540  } else
1541 #elif KMP_USE_INLINED_FUTEX
1542  if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1543  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1544  } else
1545 #endif
1546  {
1547  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1548  }
1549  } else {
1550  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1551  lck = ilk->lock;
1552  if (__kmp_env_consistency_check) {
1553  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1554  __kmp_map_hint_to_lock(hint));
1555  }
1556 #if USE_ITT_BUILD
1557  __kmp_itt_critical_acquiring(lck);
1558 #endif
1559 #if OMPT_SUPPORT && OMPT_OPTIONAL
1560  if (ompt_enabled.enabled) {
1561  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1562  /* OMPT state update */
1563  prev_state = ti.state;
1564  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1565  ti.state = ompt_state_wait_critical;
1566 
1567  /* OMPT event callback */
1568  if (ompt_enabled.ompt_callback_mutex_acquire) {
1569  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1570  ompt_mutex_critical, (unsigned int)hint,
1571  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1572  codeptr);
1573  }
1574  }
1575 #endif
1576  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1577  }
1578  KMP_POP_PARTITIONED_TIMER();
1579 
1580 #if USE_ITT_BUILD
1581  __kmp_itt_critical_acquired(lck);
1582 #endif /* USE_ITT_BUILD */
1583 #if OMPT_SUPPORT && OMPT_OPTIONAL
1584  if (ompt_enabled.enabled) {
1585  /* OMPT state update */
1586  ti.state = prev_state;
1587  ti.wait_id = 0;
1588 
1589  /* OMPT event callback */
1590  if (ompt_enabled.ompt_callback_mutex_acquired) {
1591  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1592  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1593  }
1594  }
1595 #endif
1596 
1597  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1598  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1599 } // __kmpc_critical_with_hint
1600 
1601 #endif // KMP_USE_DYNAMIC_LOCK
1602 
1612 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1613  kmp_critical_name *crit) {
1614  kmp_user_lock_p lck;
1615 
1616  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1617 
1618 #if KMP_USE_DYNAMIC_LOCK
1619  int locktag = KMP_EXTRACT_D_TAG(crit);
1620  if (locktag) {
1621  lck = (kmp_user_lock_p)crit;
1622  KMP_ASSERT(lck != NULL);
1623  if (__kmp_env_consistency_check) {
1624  __kmp_pop_sync(global_tid, ct_critical, loc);
1625  }
1626 #if USE_ITT_BUILD
1627  __kmp_itt_critical_releasing(lck);
1628 #endif
1629 #if KMP_USE_INLINED_TAS
1630  if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1631  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1632  } else
1633 #elif KMP_USE_INLINED_FUTEX
1634  if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1635  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1636  } else
1637 #endif
1638  {
1639  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1640  }
1641  } else {
1642  kmp_indirect_lock_t *ilk =
1643  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1644  KMP_ASSERT(ilk != NULL);
1645  lck = ilk->lock;
1646  if (__kmp_env_consistency_check) {
1647  __kmp_pop_sync(global_tid, ct_critical, loc);
1648  }
1649 #if USE_ITT_BUILD
1650  __kmp_itt_critical_releasing(lck);
1651 #endif
1652  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1653  }
1654 
1655 #else // KMP_USE_DYNAMIC_LOCK
1656 
1657  if ((__kmp_user_lock_kind == lk_tas) &&
1658  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1659  lck = (kmp_user_lock_p)crit;
1660  }
1661 #if KMP_USE_FUTEX
1662  else if ((__kmp_user_lock_kind == lk_futex) &&
1663  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1664  lck = (kmp_user_lock_p)crit;
1665  }
1666 #endif
1667  else { // ticket, queuing or drdpa
1668  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1669  }
1670 
1671  KMP_ASSERT(lck != NULL);
1672 
1673  if (__kmp_env_consistency_check)
1674  __kmp_pop_sync(global_tid, ct_critical, loc);
1675 
1676 #if USE_ITT_BUILD
1677  __kmp_itt_critical_releasing(lck);
1678 #endif /* USE_ITT_BUILD */
1679  // Value of 'crit' should be good for using as a critical_id of the critical
1680  // section directive.
1681  __kmp_release_user_lock_with_checks(lck, global_tid);
1682 
1683 #endif // KMP_USE_DYNAMIC_LOCK
1684 
1685 #if OMPT_SUPPORT && OMPT_OPTIONAL
1686  /* OMPT release event triggers after lock is released; place here to trigger
1687  * for all #if branches */
1688  OMPT_STORE_RETURN_ADDRESS(global_tid);
1689  if (ompt_enabled.ompt_callback_mutex_released) {
1690  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1691  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1692  OMPT_LOAD_RETURN_ADDRESS(0));
1693  }
1694 #endif
1695 
1696  KMP_POP_PARTITIONED_TIMER();
1697  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1698 }
1699 
1709 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1710  int status;
1711  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1712  __kmp_assert_valid_gtid(global_tid);
1713 
1714  if (!TCR_4(__kmp_init_parallel))
1715  __kmp_parallel_initialize();
1716 
1717  __kmp_resume_if_soft_paused();
1718 
1719  if (__kmp_env_consistency_check)
1720  __kmp_check_barrier(global_tid, ct_barrier, loc);
1721 
1722 #if OMPT_SUPPORT
1723  ompt_frame_t *ompt_frame;
1724  if (ompt_enabled.enabled) {
1725  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1726  if (ompt_frame->enter_frame.ptr == NULL)
1727  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1728  }
1729  OMPT_STORE_RETURN_ADDRESS(global_tid);
1730 #endif
1731 #if USE_ITT_NOTIFY
1732  __kmp_threads[global_tid]->th.th_ident = loc;
1733 #endif
1734  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1735 #if OMPT_SUPPORT && OMPT_OPTIONAL
1736  if (ompt_enabled.enabled) {
1737  ompt_frame->enter_frame = ompt_data_none;
1738  }
1739 #endif
1740 
1741  return (status != 0) ? 0 : 1;
1742 }
1743 
1753 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1754  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1755  __kmp_assert_valid_gtid(global_tid);
1756  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1757 }
1758 
1769 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1770  kmp_int32 ret;
1771  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1772  __kmp_assert_valid_gtid(global_tid);
1773 
1774  if (!TCR_4(__kmp_init_parallel))
1775  __kmp_parallel_initialize();
1776 
1777  __kmp_resume_if_soft_paused();
1778 
1779  if (__kmp_env_consistency_check) {
1780  if (loc == 0) {
1781  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1782  }
1783  __kmp_check_barrier(global_tid, ct_barrier, loc);
1784  }
1785 
1786 #if OMPT_SUPPORT
1787  ompt_frame_t *ompt_frame;
1788  if (ompt_enabled.enabled) {
1789  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1790  if (ompt_frame->enter_frame.ptr == NULL)
1791  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1792  }
1793  OMPT_STORE_RETURN_ADDRESS(global_tid);
1794 #endif
1795 #if USE_ITT_NOTIFY
1796  __kmp_threads[global_tid]->th.th_ident = loc;
1797 #endif
1798  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1799 #if OMPT_SUPPORT && OMPT_OPTIONAL
1800  if (ompt_enabled.enabled) {
1801  ompt_frame->enter_frame = ompt_data_none;
1802  }
1803 #endif
1804 
1805  ret = __kmpc_master(loc, global_tid);
1806 
1807  if (__kmp_env_consistency_check) {
1808  /* there's no __kmpc_end_master called; so the (stats) */
1809  /* actions of __kmpc_end_master are done here */
1810  if (ret) {
1811  /* only one thread should do the pop since only */
1812  /* one did the push (see __kmpc_master()) */
1813  __kmp_pop_sync(global_tid, ct_master, loc);
1814  }
1815  }
1816 
1817  return (ret);
1818 }
1819 
1820 /* The BARRIER for a SINGLE process section is always explicit */
1832 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1833  __kmp_assert_valid_gtid(global_tid);
1834  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1835 
1836  if (rc) {
1837  // We are going to execute the single statement, so we should count it.
1838  KMP_COUNT_BLOCK(OMP_SINGLE);
1839  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1840  }
1841 
1842 #if OMPT_SUPPORT && OMPT_OPTIONAL
1843  kmp_info_t *this_thr = __kmp_threads[global_tid];
1844  kmp_team_t *team = this_thr->th.th_team;
1845  int tid = __kmp_tid_from_gtid(global_tid);
1846 
1847  if (ompt_enabled.enabled) {
1848  if (rc) {
1849  if (ompt_enabled.ompt_callback_work) {
1850  ompt_callbacks.ompt_callback(ompt_callback_work)(
1851  ompt_work_single_executor, ompt_scope_begin,
1852  &(team->t.ompt_team_info.parallel_data),
1853  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1854  1, OMPT_GET_RETURN_ADDRESS(0));
1855  }
1856  } else {
1857  if (ompt_enabled.ompt_callback_work) {
1858  ompt_callbacks.ompt_callback(ompt_callback_work)(
1859  ompt_work_single_other, ompt_scope_begin,
1860  &(team->t.ompt_team_info.parallel_data),
1861  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1862  1, OMPT_GET_RETURN_ADDRESS(0));
1863  ompt_callbacks.ompt_callback(ompt_callback_work)(
1864  ompt_work_single_other, ompt_scope_end,
1865  &(team->t.ompt_team_info.parallel_data),
1866  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1867  1, OMPT_GET_RETURN_ADDRESS(0));
1868  }
1869  }
1870  }
1871 #endif
1872 
1873  return rc;
1874 }
1875 
1885 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1886  __kmp_assert_valid_gtid(global_tid);
1887  __kmp_exit_single(global_tid);
1888  KMP_POP_PARTITIONED_TIMER();
1889 
1890 #if OMPT_SUPPORT && OMPT_OPTIONAL
1891  kmp_info_t *this_thr = __kmp_threads[global_tid];
1892  kmp_team_t *team = this_thr->th.th_team;
1893  int tid = __kmp_tid_from_gtid(global_tid);
1894 
1895  if (ompt_enabled.ompt_callback_work) {
1896  ompt_callbacks.ompt_callback(ompt_callback_work)(
1897  ompt_work_single_executor, ompt_scope_end,
1898  &(team->t.ompt_team_info.parallel_data),
1899  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1900  OMPT_GET_RETURN_ADDRESS(0));
1901  }
1902 #endif
1903 }
1904 
1912 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1913  KMP_POP_PARTITIONED_TIMER();
1914  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1915 
1916 #if OMPT_SUPPORT && OMPT_OPTIONAL
1917  if (ompt_enabled.ompt_callback_work) {
1918  ompt_work_t ompt_work_type = ompt_work_loop;
1919  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1920  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1921  // Determine workshare type
1922  if (loc != NULL) {
1923  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1924  ompt_work_type = ompt_work_loop;
1925  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1926  ompt_work_type = ompt_work_sections;
1927  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1928  ompt_work_type = ompt_work_distribute;
1929  } else {
1930  // use default set above.
1931  // a warning about this case is provided in __kmpc_for_static_init
1932  }
1933  KMP_DEBUG_ASSERT(ompt_work_type);
1934  }
1935  ompt_callbacks.ompt_callback(ompt_callback_work)(
1936  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1937  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1938  }
1939 #endif
1940  if (__kmp_env_consistency_check)
1941  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1942 }
1943 
1944 // User routines which take C-style arguments (call by value)
1945 // different from the Fortran equivalent routines
1946 
1947 void ompc_set_num_threads(int arg) {
1948  // !!!!! TODO: check the per-task binding
1949  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1950 }
1951 
1952 void ompc_set_dynamic(int flag) {
1953  kmp_info_t *thread;
1954 
1955  /* For the thread-private implementation of the internal controls */
1956  thread = __kmp_entry_thread();
1957 
1958  __kmp_save_internal_controls(thread);
1959 
1960  set__dynamic(thread, flag ? true : false);
1961 }
1962 
1963 void ompc_set_nested(int flag) {
1964  kmp_info_t *thread;
1965 
1966  /* For the thread-private internal controls implementation */
1967  thread = __kmp_entry_thread();
1968 
1969  __kmp_save_internal_controls(thread);
1970 
1971  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1972 }
1973 
1974 void ompc_set_max_active_levels(int max_active_levels) {
1975  /* TO DO */
1976  /* we want per-task implementation of this internal control */
1977 
1978  /* For the per-thread internal controls implementation */
1979  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1980 }
1981 
1982 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1983  // !!!!! TODO: check the per-task binding
1984  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1985 }
1986 
1987 int ompc_get_ancestor_thread_num(int level) {
1988  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1989 }
1990 
1991 int ompc_get_team_size(int level) {
1992  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1993 }
1994 
1995 /* OpenMP 5.0 Affinity Format API */
1996 void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
1997  if (!__kmp_init_serial) {
1998  __kmp_serial_initialize();
1999  }
2000  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2001  format, KMP_STRLEN(format) + 1);
2002 }
2003 
2004 size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2005  size_t format_size;
2006  if (!__kmp_init_serial) {
2007  __kmp_serial_initialize();
2008  }
2009  format_size = KMP_STRLEN(__kmp_affinity_format);
2010  if (buffer && size) {
2011  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2012  format_size + 1);
2013  }
2014  return format_size;
2015 }
2016 
2017 void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2018  int gtid;
2019  if (!TCR_4(__kmp_init_middle)) {
2020  __kmp_middle_initialize();
2021  }
2022  __kmp_assign_root_init_mask();
2023  gtid = __kmp_get_gtid();
2024  __kmp_aux_display_affinity(gtid, format);
2025 }
2026 
2027 size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2028  char const *format) {
2029  int gtid;
2030  size_t num_required;
2031  kmp_str_buf_t capture_buf;
2032  if (!TCR_4(__kmp_init_middle)) {
2033  __kmp_middle_initialize();
2034  }
2035  __kmp_assign_root_init_mask();
2036  gtid = __kmp_get_gtid();
2037  __kmp_str_buf_init(&capture_buf);
2038  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2039  if (buffer && buf_size) {
2040  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2041  capture_buf.used + 1);
2042  }
2043  __kmp_str_buf_free(&capture_buf);
2044  return num_required;
2045 }
2046 
2047 void kmpc_set_stacksize(int arg) {
2048  // __kmp_aux_set_stacksize initializes the library if needed
2049  __kmp_aux_set_stacksize(arg);
2050 }
2051 
2052 void kmpc_set_stacksize_s(size_t arg) {
2053  // __kmp_aux_set_stacksize initializes the library if needed
2054  __kmp_aux_set_stacksize(arg);
2055 }
2056 
2057 void kmpc_set_blocktime(int arg) {
2058  int gtid, tid;
2059  kmp_info_t *thread;
2060 
2061  gtid = __kmp_entry_gtid();
2062  tid = __kmp_tid_from_gtid(gtid);
2063  thread = __kmp_thread_from_gtid(gtid);
2064 
2065  __kmp_aux_set_blocktime(arg, thread, tid);
2066 }
2067 
2068 void kmpc_set_library(int arg) {
2069  // __kmp_user_set_library initializes the library if needed
2070  __kmp_user_set_library((enum library_type)arg);
2071 }
2072 
2073 void kmpc_set_defaults(char const *str) {
2074  // __kmp_aux_set_defaults initializes the library if needed
2075  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2076 }
2077 
2078 void kmpc_set_disp_num_buffers(int arg) {
2079  // ignore after initialization because some teams have already
2080  // allocated dispatch buffers
2081  if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2082  arg <= KMP_MAX_DISP_NUM_BUFF) {
2083  __kmp_dispatch_num_buffers = arg;
2084  }
2085 }
2086 
2087 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2088 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2089  return -1;
2090 #else
2091  if (!TCR_4(__kmp_init_middle)) {
2092  __kmp_middle_initialize();
2093  }
2094  __kmp_assign_root_init_mask();
2095  return __kmp_aux_set_affinity_mask_proc(proc, mask);
2096 #endif
2097 }
2098 
2099 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2100 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2101  return -1;
2102 #else
2103  if (!TCR_4(__kmp_init_middle)) {
2104  __kmp_middle_initialize();
2105  }
2106  __kmp_assign_root_init_mask();
2107  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2108 #endif
2109 }
2110 
2111 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2112 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2113  return -1;
2114 #else
2115  if (!TCR_4(__kmp_init_middle)) {
2116  __kmp_middle_initialize();
2117  }
2118  __kmp_assign_root_init_mask();
2119  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2120 #endif
2121 }
2122 
2123 /* -------------------------------------------------------------------------- */
2168 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2169  void *cpy_data, void (*cpy_func)(void *, void *),
2170  kmp_int32 didit) {
2171  void **data_ptr;
2172  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2173  __kmp_assert_valid_gtid(gtid);
2174 
2175  KMP_MB();
2176 
2177  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2178 
2179  if (__kmp_env_consistency_check) {
2180  if (loc == 0) {
2181  KMP_WARNING(ConstructIdentInvalid);
2182  }
2183  }
2184 
2185  // ToDo: Optimize the following two barriers into some kind of split barrier
2186 
2187  if (didit)
2188  *data_ptr = cpy_data;
2189 
2190 #if OMPT_SUPPORT
2191  ompt_frame_t *ompt_frame;
2192  if (ompt_enabled.enabled) {
2193  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2194  if (ompt_frame->enter_frame.ptr == NULL)
2195  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2196  }
2197  OMPT_STORE_RETURN_ADDRESS(gtid);
2198 #endif
2199 /* This barrier is not a barrier region boundary */
2200 #if USE_ITT_NOTIFY
2201  __kmp_threads[gtid]->th.th_ident = loc;
2202 #endif
2203  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2204 
2205  if (!didit)
2206  (*cpy_func)(cpy_data, *data_ptr);
2207 
2208  // Consider next barrier a user-visible barrier for barrier region boundaries
2209  // Nesting checks are already handled by the single construct checks
2210  {
2211 #if OMPT_SUPPORT
2212  OMPT_STORE_RETURN_ADDRESS(gtid);
2213 #endif
2214 #if USE_ITT_NOTIFY
2215  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2216 // tasks can overwrite the location)
2217 #endif
2218  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2219 #if OMPT_SUPPORT && OMPT_OPTIONAL
2220  if (ompt_enabled.enabled) {
2221  ompt_frame->enter_frame = ompt_data_none;
2222  }
2223 #endif
2224  }
2225 }
2226 
2227 /* -------------------------------------------------------------------------- */
2228 
2229 #define INIT_LOCK __kmp_init_user_lock_with_checks
2230 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2231 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2232 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2233 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2234 #define ACQUIRE_NESTED_LOCK_TIMED \
2235  __kmp_acquire_nested_user_lock_with_checks_timed
2236 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2237 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2238 #define TEST_LOCK __kmp_test_user_lock_with_checks
2239 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2240 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2241 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2242 
2243 // TODO: Make check abort messages use location info & pass it into
2244 // with_checks routines
2245 
2246 #if KMP_USE_DYNAMIC_LOCK
2247 
2248 // internal lock initializer
2249 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2250  kmp_dyna_lockseq_t seq) {
2251  if (KMP_IS_D_LOCK(seq)) {
2252  KMP_INIT_D_LOCK(lock, seq);
2253 #if USE_ITT_BUILD
2254  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2255 #endif
2256  } else {
2257  KMP_INIT_I_LOCK(lock, seq);
2258 #if USE_ITT_BUILD
2259  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2260  __kmp_itt_lock_creating(ilk->lock, loc);
2261 #endif
2262  }
2263 }
2264 
2265 // internal nest lock initializer
2266 static __forceinline void
2267 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2268  kmp_dyna_lockseq_t seq) {
2269 #if KMP_USE_TSX
2270  // Don't have nested lock implementation for speculative locks
2271  if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2272  seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2273  seq = __kmp_user_lock_seq;
2274 #endif
2275  switch (seq) {
2276  case lockseq_tas:
2277  seq = lockseq_nested_tas;
2278  break;
2279 #if KMP_USE_FUTEX
2280  case lockseq_futex:
2281  seq = lockseq_nested_futex;
2282  break;
2283 #endif
2284  case lockseq_ticket:
2285  seq = lockseq_nested_ticket;
2286  break;
2287  case lockseq_queuing:
2288  seq = lockseq_nested_queuing;
2289  break;
2290  case lockseq_drdpa:
2291  seq = lockseq_nested_drdpa;
2292  break;
2293  default:
2294  seq = lockseq_nested_queuing;
2295  }
2296  KMP_INIT_I_LOCK(lock, seq);
2297 #if USE_ITT_BUILD
2298  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2299  __kmp_itt_lock_creating(ilk->lock, loc);
2300 #endif
2301 }
2302 
2303 /* initialize the lock with a hint */
2304 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2305  uintptr_t hint) {
2306  KMP_DEBUG_ASSERT(__kmp_init_serial);
2307  if (__kmp_env_consistency_check && user_lock == NULL) {
2308  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2309  }
2310 
2311  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2312 
2313 #if OMPT_SUPPORT && OMPT_OPTIONAL
2314  // This is the case, if called from omp_init_lock_with_hint:
2315  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316  if (!codeptr)
2317  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318  if (ompt_enabled.ompt_callback_lock_init) {
2319  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320  ompt_mutex_lock, (omp_lock_hint_t)hint,
2321  __ompt_get_mutex_impl_type(user_lock),
2322  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2323  }
2324 #endif
2325 }
2326 
2327 /* initialize the lock with a hint */
2328 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2329  void **user_lock, uintptr_t hint) {
2330  KMP_DEBUG_ASSERT(__kmp_init_serial);
2331  if (__kmp_env_consistency_check && user_lock == NULL) {
2332  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2333  }
2334 
2335  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2336 
2337 #if OMPT_SUPPORT && OMPT_OPTIONAL
2338  // This is the case, if called from omp_init_lock_with_hint:
2339  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2340  if (!codeptr)
2341  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2342  if (ompt_enabled.ompt_callback_lock_init) {
2343  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2344  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2345  __ompt_get_mutex_impl_type(user_lock),
2346  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2347  }
2348 #endif
2349 }
2350 
2351 #endif // KMP_USE_DYNAMIC_LOCK
2352 
2353 /* initialize the lock */
2354 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2355 #if KMP_USE_DYNAMIC_LOCK
2356 
2357  KMP_DEBUG_ASSERT(__kmp_init_serial);
2358  if (__kmp_env_consistency_check && user_lock == NULL) {
2359  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2360  }
2361  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2362 
2363 #if OMPT_SUPPORT && OMPT_OPTIONAL
2364  // This is the case, if called from omp_init_lock_with_hint:
2365  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2366  if (!codeptr)
2367  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2368  if (ompt_enabled.ompt_callback_lock_init) {
2369  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2370  ompt_mutex_lock, omp_lock_hint_none,
2371  __ompt_get_mutex_impl_type(user_lock),
2372  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2373  }
2374 #endif
2375 
2376 #else // KMP_USE_DYNAMIC_LOCK
2377 
2378  static char const *const func = "omp_init_lock";
2379  kmp_user_lock_p lck;
2380  KMP_DEBUG_ASSERT(__kmp_init_serial);
2381 
2382  if (__kmp_env_consistency_check) {
2383  if (user_lock == NULL) {
2384  KMP_FATAL(LockIsUninitialized, func);
2385  }
2386  }
2387 
2388  KMP_CHECK_USER_LOCK_INIT();
2389 
2390  if ((__kmp_user_lock_kind == lk_tas) &&
2391  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2392  lck = (kmp_user_lock_p)user_lock;
2393  }
2394 #if KMP_USE_FUTEX
2395  else if ((__kmp_user_lock_kind == lk_futex) &&
2396  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2397  lck = (kmp_user_lock_p)user_lock;
2398  }
2399 #endif
2400  else {
2401  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2402  }
2403  INIT_LOCK(lck);
2404  __kmp_set_user_lock_location(lck, loc);
2405 
2406 #if OMPT_SUPPORT && OMPT_OPTIONAL
2407  // This is the case, if called from omp_init_lock_with_hint:
2408  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2409  if (!codeptr)
2410  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2411  if (ompt_enabled.ompt_callback_lock_init) {
2412  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2413  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2414  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2415  }
2416 #endif
2417 
2418 #if USE_ITT_BUILD
2419  __kmp_itt_lock_creating(lck);
2420 #endif /* USE_ITT_BUILD */
2421 
2422 #endif // KMP_USE_DYNAMIC_LOCK
2423 } // __kmpc_init_lock
2424 
2425 /* initialize the lock */
2426 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2427 #if KMP_USE_DYNAMIC_LOCK
2428 
2429  KMP_DEBUG_ASSERT(__kmp_init_serial);
2430  if (__kmp_env_consistency_check && user_lock == NULL) {
2431  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2432  }
2433  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2434 
2435 #if OMPT_SUPPORT && OMPT_OPTIONAL
2436  // This is the case, if called from omp_init_lock_with_hint:
2437  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2438  if (!codeptr)
2439  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2440  if (ompt_enabled.ompt_callback_lock_init) {
2441  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2442  ompt_mutex_nest_lock, omp_lock_hint_none,
2443  __ompt_get_mutex_impl_type(user_lock),
2444  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2445  }
2446 #endif
2447 
2448 #else // KMP_USE_DYNAMIC_LOCK
2449 
2450  static char const *const func = "omp_init_nest_lock";
2451  kmp_user_lock_p lck;
2452  KMP_DEBUG_ASSERT(__kmp_init_serial);
2453 
2454  if (__kmp_env_consistency_check) {
2455  if (user_lock == NULL) {
2456  KMP_FATAL(LockIsUninitialized, func);
2457  }
2458  }
2459 
2460  KMP_CHECK_USER_LOCK_INIT();
2461 
2462  if ((__kmp_user_lock_kind == lk_tas) &&
2463  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2464  OMP_NEST_LOCK_T_SIZE)) {
2465  lck = (kmp_user_lock_p)user_lock;
2466  }
2467 #if KMP_USE_FUTEX
2468  else if ((__kmp_user_lock_kind == lk_futex) &&
2469  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2470  OMP_NEST_LOCK_T_SIZE)) {
2471  lck = (kmp_user_lock_p)user_lock;
2472  }
2473 #endif
2474  else {
2475  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2476  }
2477 
2478  INIT_NESTED_LOCK(lck);
2479  __kmp_set_user_lock_location(lck, loc);
2480 
2481 #if OMPT_SUPPORT && OMPT_OPTIONAL
2482  // This is the case, if called from omp_init_lock_with_hint:
2483  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2484  if (!codeptr)
2485  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2486  if (ompt_enabled.ompt_callback_lock_init) {
2487  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2488  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2489  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2490  }
2491 #endif
2492 
2493 #if USE_ITT_BUILD
2494  __kmp_itt_lock_creating(lck);
2495 #endif /* USE_ITT_BUILD */
2496 
2497 #endif // KMP_USE_DYNAMIC_LOCK
2498 } // __kmpc_init_nest_lock
2499 
2500 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2501 #if KMP_USE_DYNAMIC_LOCK
2502 
2503 #if USE_ITT_BUILD
2504  kmp_user_lock_p lck;
2505  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2506  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2507  } else {
2508  lck = (kmp_user_lock_p)user_lock;
2509  }
2510  __kmp_itt_lock_destroyed(lck);
2511 #endif
2512 #if OMPT_SUPPORT && OMPT_OPTIONAL
2513  // This is the case, if called from omp_init_lock_with_hint:
2514  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2515  if (!codeptr)
2516  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2517  if (ompt_enabled.ompt_callback_lock_destroy) {
2518  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2519  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2520  }
2521 #endif
2522  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2523 #else
2524  kmp_user_lock_p lck;
2525 
2526  if ((__kmp_user_lock_kind == lk_tas) &&
2527  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2528  lck = (kmp_user_lock_p)user_lock;
2529  }
2530 #if KMP_USE_FUTEX
2531  else if ((__kmp_user_lock_kind == lk_futex) &&
2532  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2533  lck = (kmp_user_lock_p)user_lock;
2534  }
2535 #endif
2536  else {
2537  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2538  }
2539 
2540 #if OMPT_SUPPORT && OMPT_OPTIONAL
2541  // This is the case, if called from omp_init_lock_with_hint:
2542  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2543  if (!codeptr)
2544  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2545  if (ompt_enabled.ompt_callback_lock_destroy) {
2546  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2547  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2548  }
2549 #endif
2550 
2551 #if USE_ITT_BUILD
2552  __kmp_itt_lock_destroyed(lck);
2553 #endif /* USE_ITT_BUILD */
2554  DESTROY_LOCK(lck);
2555 
2556  if ((__kmp_user_lock_kind == lk_tas) &&
2557  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2558  ;
2559  }
2560 #if KMP_USE_FUTEX
2561  else if ((__kmp_user_lock_kind == lk_futex) &&
2562  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2563  ;
2564  }
2565 #endif
2566  else {
2567  __kmp_user_lock_free(user_lock, gtid, lck);
2568  }
2569 #endif // KMP_USE_DYNAMIC_LOCK
2570 } // __kmpc_destroy_lock
2571 
2572 /* destroy the lock */
2573 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2574 #if KMP_USE_DYNAMIC_LOCK
2575 
2576 #if USE_ITT_BUILD
2577  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2578  __kmp_itt_lock_destroyed(ilk->lock);
2579 #endif
2580 #if OMPT_SUPPORT && OMPT_OPTIONAL
2581  // This is the case, if called from omp_init_lock_with_hint:
2582  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2583  if (!codeptr)
2584  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2585  if (ompt_enabled.ompt_callback_lock_destroy) {
2586  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2587  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2588  }
2589 #endif
2590  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2591 
2592 #else // KMP_USE_DYNAMIC_LOCK
2593 
2594  kmp_user_lock_p lck;
2595 
2596  if ((__kmp_user_lock_kind == lk_tas) &&
2597  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2598  OMP_NEST_LOCK_T_SIZE)) {
2599  lck = (kmp_user_lock_p)user_lock;
2600  }
2601 #if KMP_USE_FUTEX
2602  else if ((__kmp_user_lock_kind == lk_futex) &&
2603  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2604  OMP_NEST_LOCK_T_SIZE)) {
2605  lck = (kmp_user_lock_p)user_lock;
2606  }
2607 #endif
2608  else {
2609  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2610  }
2611 
2612 #if OMPT_SUPPORT && OMPT_OPTIONAL
2613  // This is the case, if called from omp_init_lock_with_hint:
2614  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2615  if (!codeptr)
2616  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2617  if (ompt_enabled.ompt_callback_lock_destroy) {
2618  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2619  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2620  }
2621 #endif
2622 
2623 #if USE_ITT_BUILD
2624  __kmp_itt_lock_destroyed(lck);
2625 #endif /* USE_ITT_BUILD */
2626 
2627  DESTROY_NESTED_LOCK(lck);
2628 
2629  if ((__kmp_user_lock_kind == lk_tas) &&
2630  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2631  OMP_NEST_LOCK_T_SIZE)) {
2632  ;
2633  }
2634 #if KMP_USE_FUTEX
2635  else if ((__kmp_user_lock_kind == lk_futex) &&
2636  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2637  OMP_NEST_LOCK_T_SIZE)) {
2638  ;
2639  }
2640 #endif
2641  else {
2642  __kmp_user_lock_free(user_lock, gtid, lck);
2643  }
2644 #endif // KMP_USE_DYNAMIC_LOCK
2645 } // __kmpc_destroy_nest_lock
2646 
2647 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2648  KMP_COUNT_BLOCK(OMP_set_lock);
2649 #if KMP_USE_DYNAMIC_LOCK
2650  int tag = KMP_EXTRACT_D_TAG(user_lock);
2651 #if USE_ITT_BUILD
2652  __kmp_itt_lock_acquiring(
2653  (kmp_user_lock_p)
2654  user_lock); // itt function will get to the right lock object.
2655 #endif
2656 #if OMPT_SUPPORT && OMPT_OPTIONAL
2657  // This is the case, if called from omp_init_lock_with_hint:
2658  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2659  if (!codeptr)
2660  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2661  if (ompt_enabled.ompt_callback_mutex_acquire) {
2662  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2663  ompt_mutex_lock, omp_lock_hint_none,
2664  __ompt_get_mutex_impl_type(user_lock),
2665  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2666  }
2667 #endif
2668 #if KMP_USE_INLINED_TAS
2669  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2670  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2671  } else
2672 #elif KMP_USE_INLINED_FUTEX
2673  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2674  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2675  } else
2676 #endif
2677  {
2678  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2679  }
2680 #if USE_ITT_BUILD
2681  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2682 #endif
2683 #if OMPT_SUPPORT && OMPT_OPTIONAL
2684  if (ompt_enabled.ompt_callback_mutex_acquired) {
2685  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2686  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2687  }
2688 #endif
2689 
2690 #else // KMP_USE_DYNAMIC_LOCK
2691 
2692  kmp_user_lock_p lck;
2693 
2694  if ((__kmp_user_lock_kind == lk_tas) &&
2695  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2696  lck = (kmp_user_lock_p)user_lock;
2697  }
2698 #if KMP_USE_FUTEX
2699  else if ((__kmp_user_lock_kind == lk_futex) &&
2700  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2701  lck = (kmp_user_lock_p)user_lock;
2702  }
2703 #endif
2704  else {
2705  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2706  }
2707 
2708 #if USE_ITT_BUILD
2709  __kmp_itt_lock_acquiring(lck);
2710 #endif /* USE_ITT_BUILD */
2711 #if OMPT_SUPPORT && OMPT_OPTIONAL
2712  // This is the case, if called from omp_init_lock_with_hint:
2713  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2714  if (!codeptr)
2715  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2716  if (ompt_enabled.ompt_callback_mutex_acquire) {
2717  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2718  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2719  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2720  }
2721 #endif
2722 
2723  ACQUIRE_LOCK(lck, gtid);
2724 
2725 #if USE_ITT_BUILD
2726  __kmp_itt_lock_acquired(lck);
2727 #endif /* USE_ITT_BUILD */
2728 
2729 #if OMPT_SUPPORT && OMPT_OPTIONAL
2730  if (ompt_enabled.ompt_callback_mutex_acquired) {
2731  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2732  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2733  }
2734 #endif
2735 
2736 #endif // KMP_USE_DYNAMIC_LOCK
2737 }
2738 
2739 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2740 #if KMP_USE_DYNAMIC_LOCK
2741 
2742 #if USE_ITT_BUILD
2743  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2744 #endif
2745 #if OMPT_SUPPORT && OMPT_OPTIONAL
2746  // This is the case, if called from omp_init_lock_with_hint:
2747  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748  if (!codeptr)
2749  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750  if (ompt_enabled.enabled) {
2751  if (ompt_enabled.ompt_callback_mutex_acquire) {
2752  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2753  ompt_mutex_nest_lock, omp_lock_hint_none,
2754  __ompt_get_mutex_impl_type(user_lock),
2755  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2756  }
2757  }
2758 #endif
2759  int acquire_status =
2760  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2761  (void)acquire_status;
2762 #if USE_ITT_BUILD
2763  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2764 #endif
2765 
2766 #if OMPT_SUPPORT && OMPT_OPTIONAL
2767  if (ompt_enabled.enabled) {
2768  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2769  if (ompt_enabled.ompt_callback_mutex_acquired) {
2770  // lock_first
2771  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2772  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2773  codeptr);
2774  }
2775  } else {
2776  if (ompt_enabled.ompt_callback_nest_lock) {
2777  // lock_next
2778  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2779  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2780  }
2781  }
2782  }
2783 #endif
2784 
2785 #else // KMP_USE_DYNAMIC_LOCK
2786  int acquire_status;
2787  kmp_user_lock_p lck;
2788 
2789  if ((__kmp_user_lock_kind == lk_tas) &&
2790  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2791  OMP_NEST_LOCK_T_SIZE)) {
2792  lck = (kmp_user_lock_p)user_lock;
2793  }
2794 #if KMP_USE_FUTEX
2795  else if ((__kmp_user_lock_kind == lk_futex) &&
2796  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2797  OMP_NEST_LOCK_T_SIZE)) {
2798  lck = (kmp_user_lock_p)user_lock;
2799  }
2800 #endif
2801  else {
2802  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2803  }
2804 
2805 #if USE_ITT_BUILD
2806  __kmp_itt_lock_acquiring(lck);
2807 #endif /* USE_ITT_BUILD */
2808 #if OMPT_SUPPORT && OMPT_OPTIONAL
2809  // This is the case, if called from omp_init_lock_with_hint:
2810  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2811  if (!codeptr)
2812  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2813  if (ompt_enabled.enabled) {
2814  if (ompt_enabled.ompt_callback_mutex_acquire) {
2815  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2816  ompt_mutex_nest_lock, omp_lock_hint_none,
2817  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2818  codeptr);
2819  }
2820  }
2821 #endif
2822 
2823  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2824 
2825 #if USE_ITT_BUILD
2826  __kmp_itt_lock_acquired(lck);
2827 #endif /* USE_ITT_BUILD */
2828 
2829 #if OMPT_SUPPORT && OMPT_OPTIONAL
2830  if (ompt_enabled.enabled) {
2831  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2832  if (ompt_enabled.ompt_callback_mutex_acquired) {
2833  // lock_first
2834  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2835  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2836  }
2837  } else {
2838  if (ompt_enabled.ompt_callback_nest_lock) {
2839  // lock_next
2840  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2841  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2842  }
2843  }
2844  }
2845 #endif
2846 
2847 #endif // KMP_USE_DYNAMIC_LOCK
2848 }
2849 
2850 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2851 #if KMP_USE_DYNAMIC_LOCK
2852 
2853  int tag = KMP_EXTRACT_D_TAG(user_lock);
2854 #if USE_ITT_BUILD
2855  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2856 #endif
2857 #if KMP_USE_INLINED_TAS
2858  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2859  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2860  } else
2861 #elif KMP_USE_INLINED_FUTEX
2862  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2863  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2864  } else
2865 #endif
2866  {
2867  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2868  }
2869 
2870 #if OMPT_SUPPORT && OMPT_OPTIONAL
2871  // This is the case, if called from omp_init_lock_with_hint:
2872  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873  if (!codeptr)
2874  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875  if (ompt_enabled.ompt_callback_mutex_released) {
2876  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2877  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2878  }
2879 #endif
2880 
2881 #else // KMP_USE_DYNAMIC_LOCK
2882 
2883  kmp_user_lock_p lck;
2884 
2885  /* Can't use serial interval since not block structured */
2886  /* release the lock */
2887 
2888  if ((__kmp_user_lock_kind == lk_tas) &&
2889  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2890 #if KMP_OS_LINUX && \
2891  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2892 // "fast" path implemented to fix customer performance issue
2893 #if USE_ITT_BUILD
2894  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2895 #endif /* USE_ITT_BUILD */
2896  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2897  KMP_MB();
2898 
2899 #if OMPT_SUPPORT && OMPT_OPTIONAL
2900  // This is the case, if called from omp_init_lock_with_hint:
2901  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2902  if (!codeptr)
2903  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2904  if (ompt_enabled.ompt_callback_mutex_released) {
2905  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2906  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2907  }
2908 #endif
2909 
2910  return;
2911 #else
2912  lck = (kmp_user_lock_p)user_lock;
2913 #endif
2914  }
2915 #if KMP_USE_FUTEX
2916  else if ((__kmp_user_lock_kind == lk_futex) &&
2917  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2918  lck = (kmp_user_lock_p)user_lock;
2919  }
2920 #endif
2921  else {
2922  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2923  }
2924 
2925 #if USE_ITT_BUILD
2926  __kmp_itt_lock_releasing(lck);
2927 #endif /* USE_ITT_BUILD */
2928 
2929  RELEASE_LOCK(lck, gtid);
2930 
2931 #if OMPT_SUPPORT && OMPT_OPTIONAL
2932  // This is the case, if called from omp_init_lock_with_hint:
2933  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2934  if (!codeptr)
2935  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936  if (ompt_enabled.ompt_callback_mutex_released) {
2937  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2938  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2939  }
2940 #endif
2941 
2942 #endif // KMP_USE_DYNAMIC_LOCK
2943 }
2944 
2945 /* release the lock */
2946 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2947 #if KMP_USE_DYNAMIC_LOCK
2948 
2949 #if USE_ITT_BUILD
2950  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2951 #endif
2952  int release_status =
2953  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2954  (void)release_status;
2955 
2956 #if OMPT_SUPPORT && OMPT_OPTIONAL
2957  // This is the case, if called from omp_init_lock_with_hint:
2958  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2959  if (!codeptr)
2960  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2961  if (ompt_enabled.enabled) {
2962  if (release_status == KMP_LOCK_RELEASED) {
2963  if (ompt_enabled.ompt_callback_mutex_released) {
2964  // release_lock_last
2965  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2966  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2967  codeptr);
2968  }
2969  } else if (ompt_enabled.ompt_callback_nest_lock) {
2970  // release_lock_prev
2971  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2972  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2973  }
2974  }
2975 #endif
2976 
2977 #else // KMP_USE_DYNAMIC_LOCK
2978 
2979  kmp_user_lock_p lck;
2980 
2981  /* Can't use serial interval since not block structured */
2982 
2983  if ((__kmp_user_lock_kind == lk_tas) &&
2984  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2985  OMP_NEST_LOCK_T_SIZE)) {
2986 #if KMP_OS_LINUX && \
2987  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2988  // "fast" path implemented to fix customer performance issue
2989  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2990 #if USE_ITT_BUILD
2991  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2992 #endif /* USE_ITT_BUILD */
2993 
2994 #if OMPT_SUPPORT && OMPT_OPTIONAL
2995  int release_status = KMP_LOCK_STILL_HELD;
2996 #endif
2997 
2998  if (--(tl->lk.depth_locked) == 0) {
2999  TCW_4(tl->lk.poll, 0);
3000 #if OMPT_SUPPORT && OMPT_OPTIONAL
3001  release_status = KMP_LOCK_RELEASED;
3002 #endif
3003  }
3004  KMP_MB();
3005 
3006 #if OMPT_SUPPORT && OMPT_OPTIONAL
3007  // This is the case, if called from omp_init_lock_with_hint:
3008  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3009  if (!codeptr)
3010  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3011  if (ompt_enabled.enabled) {
3012  if (release_status == KMP_LOCK_RELEASED) {
3013  if (ompt_enabled.ompt_callback_mutex_released) {
3014  // release_lock_last
3015  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3016  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3017  }
3018  } else if (ompt_enabled.ompt_callback_nest_lock) {
3019  // release_lock_previous
3020  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3021  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3022  }
3023  }
3024 #endif
3025 
3026  return;
3027 #else
3028  lck = (kmp_user_lock_p)user_lock;
3029 #endif
3030  }
3031 #if KMP_USE_FUTEX
3032  else if ((__kmp_user_lock_kind == lk_futex) &&
3033  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3034  OMP_NEST_LOCK_T_SIZE)) {
3035  lck = (kmp_user_lock_p)user_lock;
3036  }
3037 #endif
3038  else {
3039  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3040  }
3041 
3042 #if USE_ITT_BUILD
3043  __kmp_itt_lock_releasing(lck);
3044 #endif /* USE_ITT_BUILD */
3045 
3046  int release_status;
3047  release_status = RELEASE_NESTED_LOCK(lck, gtid);
3048 #if OMPT_SUPPORT && OMPT_OPTIONAL
3049  // This is the case, if called from omp_init_lock_with_hint:
3050  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051  if (!codeptr)
3052  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3053  if (ompt_enabled.enabled) {
3054  if (release_status == KMP_LOCK_RELEASED) {
3055  if (ompt_enabled.ompt_callback_mutex_released) {
3056  // release_lock_last
3057  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3058  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3059  }
3060  } else if (ompt_enabled.ompt_callback_nest_lock) {
3061  // release_lock_previous
3062  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3063  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3064  }
3065  }
3066 #endif
3067 
3068 #endif // KMP_USE_DYNAMIC_LOCK
3069 }
3070 
3071 /* try to acquire the lock */
3072 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3073  KMP_COUNT_BLOCK(OMP_test_lock);
3074 
3075 #if KMP_USE_DYNAMIC_LOCK
3076  int rc;
3077  int tag = KMP_EXTRACT_D_TAG(user_lock);
3078 #if USE_ITT_BUILD
3079  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3080 #endif
3081 #if OMPT_SUPPORT && OMPT_OPTIONAL
3082  // This is the case, if called from omp_init_lock_with_hint:
3083  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3084  if (!codeptr)
3085  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3086  if (ompt_enabled.ompt_callback_mutex_acquire) {
3087  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3088  ompt_mutex_lock, omp_lock_hint_none,
3089  __ompt_get_mutex_impl_type(user_lock),
3090  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3091  }
3092 #endif
3093 #if KMP_USE_INLINED_TAS
3094  if (tag == locktag_tas && !__kmp_env_consistency_check) {
3095  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3096  } else
3097 #elif KMP_USE_INLINED_FUTEX
3098  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3099  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3100  } else
3101 #endif
3102  {
3103  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3104  }
3105  if (rc) {
3106 #if USE_ITT_BUILD
3107  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3108 #endif
3109 #if OMPT_SUPPORT && OMPT_OPTIONAL
3110  if (ompt_enabled.ompt_callback_mutex_acquired) {
3111  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3112  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3113  }
3114 #endif
3115  return FTN_TRUE;
3116  } else {
3117 #if USE_ITT_BUILD
3118  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3119 #endif
3120  return FTN_FALSE;
3121  }
3122 
3123 #else // KMP_USE_DYNAMIC_LOCK
3124 
3125  kmp_user_lock_p lck;
3126  int rc;
3127 
3128  if ((__kmp_user_lock_kind == lk_tas) &&
3129  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3130  lck = (kmp_user_lock_p)user_lock;
3131  }
3132 #if KMP_USE_FUTEX
3133  else if ((__kmp_user_lock_kind == lk_futex) &&
3134  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3135  lck = (kmp_user_lock_p)user_lock;
3136  }
3137 #endif
3138  else {
3139  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3140  }
3141 
3142 #if USE_ITT_BUILD
3143  __kmp_itt_lock_acquiring(lck);
3144 #endif /* USE_ITT_BUILD */
3145 #if OMPT_SUPPORT && OMPT_OPTIONAL
3146  // This is the case, if called from omp_init_lock_with_hint:
3147  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3148  if (!codeptr)
3149  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3150  if (ompt_enabled.ompt_callback_mutex_acquire) {
3151  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3152  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3153  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3154  }
3155 #endif
3156 
3157  rc = TEST_LOCK(lck, gtid);
3158 #if USE_ITT_BUILD
3159  if (rc) {
3160  __kmp_itt_lock_acquired(lck);
3161  } else {
3162  __kmp_itt_lock_cancelled(lck);
3163  }
3164 #endif /* USE_ITT_BUILD */
3165 #if OMPT_SUPPORT && OMPT_OPTIONAL
3166  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3167  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3168  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3169  }
3170 #endif
3171 
3172  return (rc ? FTN_TRUE : FTN_FALSE);
3173 
3174  /* Can't use serial interval since not block structured */
3175 
3176 #endif // KMP_USE_DYNAMIC_LOCK
3177 }
3178 
3179 /* try to acquire the lock */
3180 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3181 #if KMP_USE_DYNAMIC_LOCK
3182  int rc;
3183 #if USE_ITT_BUILD
3184  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3185 #endif
3186 #if OMPT_SUPPORT && OMPT_OPTIONAL
3187  // This is the case, if called from omp_init_lock_with_hint:
3188  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3189  if (!codeptr)
3190  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3191  if (ompt_enabled.ompt_callback_mutex_acquire) {
3192  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3193  ompt_mutex_nest_lock, omp_lock_hint_none,
3194  __ompt_get_mutex_impl_type(user_lock),
3195  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3196  }
3197 #endif
3198  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3199 #if USE_ITT_BUILD
3200  if (rc) {
3201  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3202  } else {
3203  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3204  }
3205 #endif
3206 #if OMPT_SUPPORT && OMPT_OPTIONAL
3207  if (ompt_enabled.enabled && rc) {
3208  if (rc == 1) {
3209  if (ompt_enabled.ompt_callback_mutex_acquired) {
3210  // lock_first
3211  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3212  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3213  codeptr);
3214  }
3215  } else {
3216  if (ompt_enabled.ompt_callback_nest_lock) {
3217  // lock_next
3218  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3219  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3220  }
3221  }
3222  }
3223 #endif
3224  return rc;
3225 
3226 #else // KMP_USE_DYNAMIC_LOCK
3227 
3228  kmp_user_lock_p lck;
3229  int rc;
3230 
3231  if ((__kmp_user_lock_kind == lk_tas) &&
3232  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3233  OMP_NEST_LOCK_T_SIZE)) {
3234  lck = (kmp_user_lock_p)user_lock;
3235  }
3236 #if KMP_USE_FUTEX
3237  else if ((__kmp_user_lock_kind == lk_futex) &&
3238  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3239  OMP_NEST_LOCK_T_SIZE)) {
3240  lck = (kmp_user_lock_p)user_lock;
3241  }
3242 #endif
3243  else {
3244  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3245  }
3246 
3247 #if USE_ITT_BUILD
3248  __kmp_itt_lock_acquiring(lck);
3249 #endif /* USE_ITT_BUILD */
3250 
3251 #if OMPT_SUPPORT && OMPT_OPTIONAL
3252  // This is the case, if called from omp_init_lock_with_hint:
3253  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3254  if (!codeptr)
3255  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3256  if (ompt_enabled.enabled) &&
3257  ompt_enabled.ompt_callback_mutex_acquire) {
3258  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3259  ompt_mutex_nest_lock, omp_lock_hint_none,
3260  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3261  codeptr);
3262  }
3263 #endif
3264 
3265  rc = TEST_NESTED_LOCK(lck, gtid);
3266 #if USE_ITT_BUILD
3267  if (rc) {
3268  __kmp_itt_lock_acquired(lck);
3269  } else {
3270  __kmp_itt_lock_cancelled(lck);
3271  }
3272 #endif /* USE_ITT_BUILD */
3273 #if OMPT_SUPPORT && OMPT_OPTIONAL
3274  if (ompt_enabled.enabled && rc) {
3275  if (rc == 1) {
3276  if (ompt_enabled.ompt_callback_mutex_acquired) {
3277  // lock_first
3278  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3279  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3280  }
3281  } else {
3282  if (ompt_enabled.ompt_callback_nest_lock) {
3283  // lock_next
3284  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3285  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3286  }
3287  }
3288  }
3289 #endif
3290  return rc;
3291 
3292  /* Can't use serial interval since not block structured */
3293 
3294 #endif // KMP_USE_DYNAMIC_LOCK
3295 }
3296 
3297 // Interface to fast scalable reduce methods routines
3298 
3299 // keep the selected method in a thread local structure for cross-function
3300 // usage: will be used in __kmpc_end_reduce* functions;
3301 // another solution: to re-determine the method one more time in
3302 // __kmpc_end_reduce* functions (new prototype required then)
3303 // AT: which solution is better?
3304 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3305  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3306 
3307 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3308  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3309 
3310 // description of the packed_reduction_method variable: look at the macros in
3311 // kmp.h
3312 
3313 // used in a critical section reduce block
3314 static __forceinline void
3315 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3316  kmp_critical_name *crit) {
3317 
3318  // this lock was visible to a customer and to the threading profile tool as a
3319  // serial overhead span (although it's used for an internal purpose only)
3320  // why was it visible in previous implementation?
3321  // should we keep it visible in new reduce block?
3322  kmp_user_lock_p lck;
3323 
3324 #if KMP_USE_DYNAMIC_LOCK
3325 
3326  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3327  // Check if it is initialized.
3328  if (*lk == 0) {
3329  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3330  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3331  KMP_GET_D_TAG(__kmp_user_lock_seq));
3332  } else {
3333  __kmp_init_indirect_csptr(crit, loc, global_tid,
3334  KMP_GET_I_TAG(__kmp_user_lock_seq));
3335  }
3336  }
3337  // Branch for accessing the actual lock object and set operation. This
3338  // branching is inevitable since this lock initialization does not follow the
3339  // normal dispatch path (lock table is not used).
3340  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3341  lck = (kmp_user_lock_p)lk;
3342  KMP_DEBUG_ASSERT(lck != NULL);
3343  if (__kmp_env_consistency_check) {
3344  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3345  }
3346  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3347  } else {
3348  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3349  lck = ilk->lock;
3350  KMP_DEBUG_ASSERT(lck != NULL);
3351  if (__kmp_env_consistency_check) {
3352  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3353  }
3354  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3355  }
3356 
3357 #else // KMP_USE_DYNAMIC_LOCK
3358 
3359  // We know that the fast reduction code is only emitted by Intel compilers
3360  // with 32 byte critical sections. If there isn't enough space, then we
3361  // have to use a pointer.
3362  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3363  lck = (kmp_user_lock_p)crit;
3364  } else {
3365  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3366  }
3367  KMP_DEBUG_ASSERT(lck != NULL);
3368 
3369  if (__kmp_env_consistency_check)
3370  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3371 
3372  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3373 
3374 #endif // KMP_USE_DYNAMIC_LOCK
3375 }
3376 
3377 // used in a critical section reduce block
3378 static __forceinline void
3379 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3380  kmp_critical_name *crit) {
3381 
3382  kmp_user_lock_p lck;
3383 
3384 #if KMP_USE_DYNAMIC_LOCK
3385 
3386  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3387  lck = (kmp_user_lock_p)crit;
3388  if (__kmp_env_consistency_check)
3389  __kmp_pop_sync(global_tid, ct_critical, loc);
3390  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3391  } else {
3392  kmp_indirect_lock_t *ilk =
3393  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3394  if (__kmp_env_consistency_check)
3395  __kmp_pop_sync(global_tid, ct_critical, loc);
3396  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3397  }
3398 
3399 #else // KMP_USE_DYNAMIC_LOCK
3400 
3401  // We know that the fast reduction code is only emitted by Intel compilers
3402  // with 32 byte critical sections. If there isn't enough space, then we have
3403  // to use a pointer.
3404  if (__kmp_base_user_lock_size > 32) {
3405  lck = *((kmp_user_lock_p *)crit);
3406  KMP_ASSERT(lck != NULL);
3407  } else {
3408  lck = (kmp_user_lock_p)crit;
3409  }
3410 
3411  if (__kmp_env_consistency_check)
3412  __kmp_pop_sync(global_tid, ct_critical, loc);
3413 
3414  __kmp_release_user_lock_with_checks(lck, global_tid);
3415 
3416 #endif // KMP_USE_DYNAMIC_LOCK
3417 } // __kmp_end_critical_section_reduce_block
3418 
3419 static __forceinline int
3420 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3421  int *task_state) {
3422  kmp_team_t *team;
3423 
3424  // Check if we are inside the teams construct?
3425  if (th->th.th_teams_microtask) {
3426  *team_p = team = th->th.th_team;
3427  if (team->t.t_level == th->th.th_teams_level) {
3428  // This is reduction at teams construct.
3429  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3430  // Let's swap teams temporarily for the reduction.
3431  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3432  th->th.th_team = team->t.t_parent;
3433  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3434  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3435  *task_state = th->th.th_task_state;
3436  th->th.th_task_state = 0;
3437 
3438  return 1;
3439  }
3440  }
3441  return 0;
3442 }
3443 
3444 static __forceinline void
3445 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3446  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3447  th->th.th_info.ds.ds_tid = 0;
3448  th->th.th_team = team;
3449  th->th.th_team_nproc = team->t.t_nproc;
3450  th->th.th_task_team = team->t.t_task_team[task_state];
3451  __kmp_type_convert(task_state, &(th->th.th_task_state));
3452 }
3453 
3454 /* 2.a.i. Reduce Block without a terminating barrier */
3470 kmp_int32
3471 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3472  size_t reduce_size, void *reduce_data,
3473  void (*reduce_func)(void *lhs_data, void *rhs_data),
3474  kmp_critical_name *lck) {
3475 
3476  KMP_COUNT_BLOCK(REDUCE_nowait);
3477  int retval = 0;
3478  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3479  kmp_info_t *th;
3480  kmp_team_t *team;
3481  int teams_swapped = 0, task_state;
3482  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3483  __kmp_assert_valid_gtid(global_tid);
3484 
3485  // why do we need this initialization here at all?
3486  // Reduction clause can not be used as a stand-alone directive.
3487 
3488  // do not call __kmp_serial_initialize(), it will be called by
3489  // __kmp_parallel_initialize() if needed
3490  // possible detection of false-positive race by the threadchecker ???
3491  if (!TCR_4(__kmp_init_parallel))
3492  __kmp_parallel_initialize();
3493 
3494  __kmp_resume_if_soft_paused();
3495 
3496 // check correctness of reduce block nesting
3497 #if KMP_USE_DYNAMIC_LOCK
3498  if (__kmp_env_consistency_check)
3499  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3500 #else
3501  if (__kmp_env_consistency_check)
3502  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3503 #endif
3504 
3505  th = __kmp_thread_from_gtid(global_tid);
3506  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3507 
3508  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3509  // the value should be kept in a variable
3510  // the variable should be either a construct-specific or thread-specific
3511  // property, not a team specific property
3512  // (a thread can reach the next reduce block on the next construct, reduce
3513  // method may differ on the next construct)
3514  // an ident_t "loc" parameter could be used as a construct-specific property
3515  // (what if loc == 0?)
3516  // (if both construct-specific and team-specific variables were shared,
3517  // then unness extra syncs should be needed)
3518  // a thread-specific variable is better regarding two issues above (next
3519  // construct and extra syncs)
3520  // a thread-specific "th_local.reduction_method" variable is used currently
3521  // each thread executes 'determine' and 'set' lines (no need to execute by one
3522  // thread, to avoid unness extra syncs)
3523 
3524  packed_reduction_method = __kmp_determine_reduction_method(
3525  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3526  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3527 
3528  OMPT_REDUCTION_DECL(th, global_tid);
3529  if (packed_reduction_method == critical_reduce_block) {
3530 
3531  OMPT_REDUCTION_BEGIN;
3532 
3533  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3534  retval = 1;
3535 
3536  } else if (packed_reduction_method == empty_reduce_block) {
3537 
3538  OMPT_REDUCTION_BEGIN;
3539 
3540  // usage: if team size == 1, no synchronization is required ( Intel
3541  // platforms only )
3542  retval = 1;
3543 
3544  } else if (packed_reduction_method == atomic_reduce_block) {
3545 
3546  retval = 2;
3547 
3548  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3549  // won't be called by the code gen)
3550  // (it's not quite good, because the checking block has been closed by
3551  // this 'pop',
3552  // but atomic operation has not been executed yet, will be executed
3553  // slightly later, literally on next instruction)
3554  if (__kmp_env_consistency_check)
3555  __kmp_pop_sync(global_tid, ct_reduce, loc);
3556 
3557  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3558  tree_reduce_block)) {
3559 
3560 // AT: performance issue: a real barrier here
3561 // AT: (if primary thread is slow, other threads are blocked here waiting for
3562 // the primary thread to come and release them)
3563 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3564 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3565 // be confusing to a customer)
3566 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3567 // might go faster and be more in line with sense of NOWAIT
3568 // AT: TO DO: do epcc test and compare times
3569 
3570 // this barrier should be invisible to a customer and to the threading profile
3571 // tool (it's neither a terminating barrier nor customer's code, it's
3572 // used for an internal purpose)
3573 #if OMPT_SUPPORT
3574  // JP: can this barrier potentially leed to task scheduling?
3575  // JP: as long as there is a barrier in the implementation, OMPT should and
3576  // will provide the barrier events
3577  // so we set-up the necessary frame/return addresses.
3578  ompt_frame_t *ompt_frame;
3579  if (ompt_enabled.enabled) {
3580  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3581  if (ompt_frame->enter_frame.ptr == NULL)
3582  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3583  }
3584  OMPT_STORE_RETURN_ADDRESS(global_tid);
3585 #endif
3586 #if USE_ITT_NOTIFY
3587  __kmp_threads[global_tid]->th.th_ident = loc;
3588 #endif
3589  retval =
3590  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3591  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3592  retval = (retval != 0) ? (0) : (1);
3593 #if OMPT_SUPPORT && OMPT_OPTIONAL
3594  if (ompt_enabled.enabled) {
3595  ompt_frame->enter_frame = ompt_data_none;
3596  }
3597 #endif
3598 
3599  // all other workers except primary thread should do this pop here
3600  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3601  if (__kmp_env_consistency_check) {
3602  if (retval == 0) {
3603  __kmp_pop_sync(global_tid, ct_reduce, loc);
3604  }
3605  }
3606 
3607  } else {
3608 
3609  // should never reach this block
3610  KMP_ASSERT(0); // "unexpected method"
3611  }
3612  if (teams_swapped) {
3613  __kmp_restore_swapped_teams(th, team, task_state);
3614  }
3615  KA_TRACE(
3616  10,
3617  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3618  global_tid, packed_reduction_method, retval));
3619 
3620  return retval;
3621 }
3622 
3631 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3632  kmp_critical_name *lck) {
3633 
3634  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3635 
3636  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3637  __kmp_assert_valid_gtid(global_tid);
3638 
3639  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3640 
3641  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3642 
3643  if (packed_reduction_method == critical_reduce_block) {
3644 
3645  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3646  OMPT_REDUCTION_END;
3647 
3648  } else if (packed_reduction_method == empty_reduce_block) {
3649 
3650  // usage: if team size == 1, no synchronization is required ( on Intel
3651  // platforms only )
3652 
3653  OMPT_REDUCTION_END;
3654 
3655  } else if (packed_reduction_method == atomic_reduce_block) {
3656 
3657  // neither primary thread nor other workers should get here
3658  // (code gen does not generate this call in case 2: atomic reduce block)
3659  // actually it's better to remove this elseif at all;
3660  // after removal this value will checked by the 'else' and will assert
3661 
3662  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3663  tree_reduce_block)) {
3664 
3665  // only primary thread gets here
3666  // OMPT: tree reduction is annotated in the barrier code
3667 
3668  } else {
3669 
3670  // should never reach this block
3671  KMP_ASSERT(0); // "unexpected method"
3672  }
3673 
3674  if (__kmp_env_consistency_check)
3675  __kmp_pop_sync(global_tid, ct_reduce, loc);
3676 
3677  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3678  global_tid, packed_reduction_method));
3679 
3680  return;
3681 }
3682 
3683 /* 2.a.ii. Reduce Block with a terminating barrier */
3684 
3700 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3701  size_t reduce_size, void *reduce_data,
3702  void (*reduce_func)(void *lhs_data, void *rhs_data),
3703  kmp_critical_name *lck) {
3704  KMP_COUNT_BLOCK(REDUCE_wait);
3705  int retval = 0;
3706  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3707  kmp_info_t *th;
3708  kmp_team_t *team;
3709  int teams_swapped = 0, task_state;
3710 
3711  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3712  __kmp_assert_valid_gtid(global_tid);
3713 
3714  // why do we need this initialization here at all?
3715  // Reduction clause can not be a stand-alone directive.
3716 
3717  // do not call __kmp_serial_initialize(), it will be called by
3718  // __kmp_parallel_initialize() if needed
3719  // possible detection of false-positive race by the threadchecker ???
3720  if (!TCR_4(__kmp_init_parallel))
3721  __kmp_parallel_initialize();
3722 
3723  __kmp_resume_if_soft_paused();
3724 
3725 // check correctness of reduce block nesting
3726 #if KMP_USE_DYNAMIC_LOCK
3727  if (__kmp_env_consistency_check)
3728  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3729 #else
3730  if (__kmp_env_consistency_check)
3731  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3732 #endif
3733 
3734  th = __kmp_thread_from_gtid(global_tid);
3735  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3736 
3737  packed_reduction_method = __kmp_determine_reduction_method(
3738  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3739  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3740 
3741  OMPT_REDUCTION_DECL(th, global_tid);
3742 
3743  if (packed_reduction_method == critical_reduce_block) {
3744 
3745  OMPT_REDUCTION_BEGIN;
3746  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3747  retval = 1;
3748 
3749  } else if (packed_reduction_method == empty_reduce_block) {
3750 
3751  OMPT_REDUCTION_BEGIN;
3752  // usage: if team size == 1, no synchronization is required ( Intel
3753  // platforms only )
3754  retval = 1;
3755 
3756  } else if (packed_reduction_method == atomic_reduce_block) {
3757 
3758  retval = 2;
3759 
3760  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3761  tree_reduce_block)) {
3762 
3763 // case tree_reduce_block:
3764 // this barrier should be visible to a customer and to the threading profile
3765 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3766 #if OMPT_SUPPORT
3767  ompt_frame_t *ompt_frame;
3768  if (ompt_enabled.enabled) {
3769  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3770  if (ompt_frame->enter_frame.ptr == NULL)
3771  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3772  }
3773  OMPT_STORE_RETURN_ADDRESS(global_tid);
3774 #endif
3775 #if USE_ITT_NOTIFY
3776  __kmp_threads[global_tid]->th.th_ident =
3777  loc; // needed for correct notification of frames
3778 #endif
3779  retval =
3780  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3781  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3782  retval = (retval != 0) ? (0) : (1);
3783 #if OMPT_SUPPORT && OMPT_OPTIONAL
3784  if (ompt_enabled.enabled) {
3785  ompt_frame->enter_frame = ompt_data_none;
3786  }
3787 #endif
3788 
3789  // all other workers except primary thread should do this pop here
3790  // (none of other workers except primary will enter __kmpc_end_reduce())
3791  if (__kmp_env_consistency_check) {
3792  if (retval == 0) { // 0: all other workers; 1: primary thread
3793  __kmp_pop_sync(global_tid, ct_reduce, loc);
3794  }
3795  }
3796 
3797  } else {
3798 
3799  // should never reach this block
3800  KMP_ASSERT(0); // "unexpected method"
3801  }
3802  if (teams_swapped) {
3803  __kmp_restore_swapped_teams(th, team, task_state);
3804  }
3805 
3806  KA_TRACE(10,
3807  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3808  global_tid, packed_reduction_method, retval));
3809  return retval;
3810 }
3811 
3822 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3823  kmp_critical_name *lck) {
3824 
3825  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3826  kmp_info_t *th;
3827  kmp_team_t *team;
3828  int teams_swapped = 0, task_state;
3829 
3830  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3831  __kmp_assert_valid_gtid(global_tid);
3832 
3833  th = __kmp_thread_from_gtid(global_tid);
3834  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3835 
3836  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3837 
3838  // this barrier should be visible to a customer and to the threading profile
3839  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3840  OMPT_REDUCTION_DECL(th, global_tid);
3841 
3842  if (packed_reduction_method == critical_reduce_block) {
3843  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3844 
3845  OMPT_REDUCTION_END;
3846 
3847 // TODO: implicit barrier: should be exposed
3848 #if OMPT_SUPPORT
3849  ompt_frame_t *ompt_frame;
3850  if (ompt_enabled.enabled) {
3851  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3852  if (ompt_frame->enter_frame.ptr == NULL)
3853  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3854  }
3855  OMPT_STORE_RETURN_ADDRESS(global_tid);
3856 #endif
3857 #if USE_ITT_NOTIFY
3858  __kmp_threads[global_tid]->th.th_ident = loc;
3859 #endif
3860  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3861 #if OMPT_SUPPORT && OMPT_OPTIONAL
3862  if (ompt_enabled.enabled) {
3863  ompt_frame->enter_frame = ompt_data_none;
3864  }
3865 #endif
3866 
3867  } else if (packed_reduction_method == empty_reduce_block) {
3868 
3869  OMPT_REDUCTION_END;
3870 
3871 // usage: if team size==1, no synchronization is required (Intel platforms only)
3872 
3873 // TODO: implicit barrier: should be exposed
3874 #if OMPT_SUPPORT
3875  ompt_frame_t *ompt_frame;
3876  if (ompt_enabled.enabled) {
3877  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3878  if (ompt_frame->enter_frame.ptr == NULL)
3879  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3880  }
3881  OMPT_STORE_RETURN_ADDRESS(global_tid);
3882 #endif
3883 #if USE_ITT_NOTIFY
3884  __kmp_threads[global_tid]->th.th_ident = loc;
3885 #endif
3886  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3887 #if OMPT_SUPPORT && OMPT_OPTIONAL
3888  if (ompt_enabled.enabled) {
3889  ompt_frame->enter_frame = ompt_data_none;
3890  }
3891 #endif
3892 
3893  } else if (packed_reduction_method == atomic_reduce_block) {
3894 
3895 #if OMPT_SUPPORT
3896  ompt_frame_t *ompt_frame;
3897  if (ompt_enabled.enabled) {
3898  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3899  if (ompt_frame->enter_frame.ptr == NULL)
3900  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3901  }
3902  OMPT_STORE_RETURN_ADDRESS(global_tid);
3903 #endif
3904 // TODO: implicit barrier: should be exposed
3905 #if USE_ITT_NOTIFY
3906  __kmp_threads[global_tid]->th.th_ident = loc;
3907 #endif
3908  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3909 #if OMPT_SUPPORT && OMPT_OPTIONAL
3910  if (ompt_enabled.enabled) {
3911  ompt_frame->enter_frame = ompt_data_none;
3912  }
3913 #endif
3914 
3915  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3916  tree_reduce_block)) {
3917 
3918  // only primary thread executes here (primary releases all other workers)
3919  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3920  global_tid);
3921 
3922  } else {
3923 
3924  // should never reach this block
3925  KMP_ASSERT(0); // "unexpected method"
3926  }
3927  if (teams_swapped) {
3928  __kmp_restore_swapped_teams(th, team, task_state);
3929  }
3930 
3931  if (__kmp_env_consistency_check)
3932  __kmp_pop_sync(global_tid, ct_reduce, loc);
3933 
3934  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3935  global_tid, packed_reduction_method));
3936 
3937  return;
3938 }
3939 
3940 #undef __KMP_GET_REDUCTION_METHOD
3941 #undef __KMP_SET_REDUCTION_METHOD
3942 
3943 /* end of interface to fast scalable reduce routines */
3944 
3945 kmp_uint64 __kmpc_get_taskid() {
3946 
3947  kmp_int32 gtid;
3948  kmp_info_t *thread;
3949 
3950  gtid = __kmp_get_gtid();
3951  if (gtid < 0) {
3952  return 0;
3953  }
3954  thread = __kmp_thread_from_gtid(gtid);
3955  return thread->th.th_current_task->td_task_id;
3956 
3957 } // __kmpc_get_taskid
3958 
3959 kmp_uint64 __kmpc_get_parent_taskid() {
3960 
3961  kmp_int32 gtid;
3962  kmp_info_t *thread;
3963  kmp_taskdata_t *parent_task;
3964 
3965  gtid = __kmp_get_gtid();
3966  if (gtid < 0) {
3967  return 0;
3968  }
3969  thread = __kmp_thread_from_gtid(gtid);
3970  parent_task = thread->th.th_current_task->td_parent;
3971  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3972 
3973 } // __kmpc_get_parent_taskid
3974 
3986 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3987  const struct kmp_dim *dims) {
3988  __kmp_assert_valid_gtid(gtid);
3989  int j, idx;
3990  kmp_int64 last, trace_count;
3991  kmp_info_t *th = __kmp_threads[gtid];
3992  kmp_team_t *team = th->th.th_team;
3993  kmp_uint32 *flags;
3994  kmp_disp_t *pr_buf = th->th.th_dispatch;
3995  dispatch_shared_info_t *sh_buf;
3996 
3997  KA_TRACE(
3998  20,
3999  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4000  gtid, num_dims, !team->t.t_serialized));
4001  KMP_DEBUG_ASSERT(dims != NULL);
4002  KMP_DEBUG_ASSERT(num_dims > 0);
4003 
4004  if (team->t.t_serialized) {
4005  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4006  return; // no dependencies if team is serialized
4007  }
4008  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4009  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4010  // the next loop
4011  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4012 
4013  // Save bounds info into allocated private buffer
4014  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4015  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4016  th, sizeof(kmp_int64) * (4 * num_dims + 1));
4017  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4018  pr_buf->th_doacross_info[0] =
4019  (kmp_int64)num_dims; // first element is number of dimensions
4020  // Save also address of num_done in order to access it later without knowing
4021  // the buffer index
4022  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4023  pr_buf->th_doacross_info[2] = dims[0].lo;
4024  pr_buf->th_doacross_info[3] = dims[0].up;
4025  pr_buf->th_doacross_info[4] = dims[0].st;
4026  last = 5;
4027  for (j = 1; j < num_dims; ++j) {
4028  kmp_int64
4029  range_length; // To keep ranges of all dimensions but the first dims[0]
4030  if (dims[j].st == 1) { // most common case
4031  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4032  range_length = dims[j].up - dims[j].lo + 1;
4033  } else {
4034  if (dims[j].st > 0) {
4035  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4036  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4037  } else { // negative increment
4038  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4039  range_length =
4040  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4041  }
4042  }
4043  pr_buf->th_doacross_info[last++] = range_length;
4044  pr_buf->th_doacross_info[last++] = dims[j].lo;
4045  pr_buf->th_doacross_info[last++] = dims[j].up;
4046  pr_buf->th_doacross_info[last++] = dims[j].st;
4047  }
4048 
4049  // Compute total trip count.
4050  // Start with range of dims[0] which we don't need to keep in the buffer.
4051  if (dims[0].st == 1) { // most common case
4052  trace_count = dims[0].up - dims[0].lo + 1;
4053  } else if (dims[0].st > 0) {
4054  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4055  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4056  } else { // negative increment
4057  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4058  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4059  }
4060  for (j = 1; j < num_dims; ++j) {
4061  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4062  }
4063  KMP_DEBUG_ASSERT(trace_count > 0);
4064 
4065  // Check if shared buffer is not occupied by other loop (idx -
4066  // __kmp_dispatch_num_buffers)
4067  if (idx != sh_buf->doacross_buf_idx) {
4068  // Shared buffer is occupied, wait for it to be free
4069  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4070  __kmp_eq_4, NULL);
4071  }
4072 #if KMP_32_BIT_ARCH
4073  // Check if we are the first thread. After the CAS the first thread gets 0,
4074  // others get 1 if initialization is in progress, allocated pointer otherwise.
4075  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4076  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4077  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4078 #else
4079  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4080  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4081 #endif
4082  if (flags == NULL) {
4083  // we are the first thread, allocate the array of flags
4084  size_t size =
4085  (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4086  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4087  KMP_MB();
4088  sh_buf->doacross_flags = flags;
4089  } else if (flags == (kmp_uint32 *)1) {
4090 #if KMP_32_BIT_ARCH
4091  // initialization is still in progress, need to wait
4092  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4093 #else
4094  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4095 #endif
4096  KMP_YIELD(TRUE);
4097  KMP_MB();
4098  } else {
4099  KMP_MB();
4100  }
4101  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4102  pr_buf->th_doacross_flags =
4103  sh_buf->doacross_flags; // save private copy in order to not
4104  // touch shared buffer on each iteration
4105  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4106 }
4107 
4108 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4109  __kmp_assert_valid_gtid(gtid);
4110  kmp_int64 shft;
4111  size_t num_dims, i;
4112  kmp_uint32 flag;
4113  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4114  kmp_info_t *th = __kmp_threads[gtid];
4115  kmp_team_t *team = th->th.th_team;
4116  kmp_disp_t *pr_buf;
4117  kmp_int64 lo, up, st;
4118 
4119  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4120  if (team->t.t_serialized) {
4121  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4122  return; // no dependencies if team is serialized
4123  }
4124 
4125  // calculate sequential iteration number and check out-of-bounds condition
4126  pr_buf = th->th.th_dispatch;
4127  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4128  num_dims = (size_t)pr_buf->th_doacross_info[0];
4129  lo = pr_buf->th_doacross_info[2];
4130  up = pr_buf->th_doacross_info[3];
4131  st = pr_buf->th_doacross_info[4];
4132 #if OMPT_SUPPORT && OMPT_OPTIONAL
4133  ompt_dependence_t deps[num_dims];
4134 #endif
4135  if (st == 1) { // most common case
4136  if (vec[0] < lo || vec[0] > up) {
4137  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4138  "bounds [%lld,%lld]\n",
4139  gtid, vec[0], lo, up));
4140  return;
4141  }
4142  iter_number = vec[0] - lo;
4143  } else if (st > 0) {
4144  if (vec[0] < lo || vec[0] > up) {
4145  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4146  "bounds [%lld,%lld]\n",
4147  gtid, vec[0], lo, up));
4148  return;
4149  }
4150  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4151  } else { // negative increment
4152  if (vec[0] > lo || vec[0] < up) {
4153  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4154  "bounds [%lld,%lld]\n",
4155  gtid, vec[0], lo, up));
4156  return;
4157  }
4158  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4159  }
4160 #if OMPT_SUPPORT && OMPT_OPTIONAL
4161  deps[0].variable.value = iter_number;
4162  deps[0].dependence_type = ompt_dependence_type_sink;
4163 #endif
4164  for (i = 1; i < num_dims; ++i) {
4165  kmp_int64 iter, ln;
4166  size_t j = i * 4;
4167  ln = pr_buf->th_doacross_info[j + 1];
4168  lo = pr_buf->th_doacross_info[j + 2];
4169  up = pr_buf->th_doacross_info[j + 3];
4170  st = pr_buf->th_doacross_info[j + 4];
4171  if (st == 1) {
4172  if (vec[i] < lo || vec[i] > up) {
4173  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4174  "bounds [%lld,%lld]\n",
4175  gtid, vec[i], lo, up));
4176  return;
4177  }
4178  iter = vec[i] - lo;
4179  } else if (st > 0) {
4180  if (vec[i] < lo || vec[i] > up) {
4181  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4182  "bounds [%lld,%lld]\n",
4183  gtid, vec[i], lo, up));
4184  return;
4185  }
4186  iter = (kmp_uint64)(vec[i] - lo) / st;
4187  } else { // st < 0
4188  if (vec[i] > lo || vec[i] < up) {
4189  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4190  "bounds [%lld,%lld]\n",
4191  gtid, vec[i], lo, up));
4192  return;
4193  }
4194  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4195  }
4196  iter_number = iter + ln * iter_number;
4197 #if OMPT_SUPPORT && OMPT_OPTIONAL
4198  deps[i].variable.value = iter;
4199  deps[i].dependence_type = ompt_dependence_type_sink;
4200 #endif
4201  }
4202  shft = iter_number % 32; // use 32-bit granularity
4203  iter_number >>= 5; // divided by 32
4204  flag = 1 << shft;
4205  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4206  KMP_YIELD(TRUE);
4207  }
4208  KMP_MB();
4209 #if OMPT_SUPPORT && OMPT_OPTIONAL
4210  if (ompt_enabled.ompt_callback_dependences) {
4211  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4212  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4213  }
4214 #endif
4215  KA_TRACE(20,
4216  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4217  gtid, (iter_number << 5) + shft));
4218 }
4219 
4220 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4221  __kmp_assert_valid_gtid(gtid);
4222  kmp_int64 shft;
4223  size_t num_dims, i;
4224  kmp_uint32 flag;
4225  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4226  kmp_info_t *th = __kmp_threads[gtid];
4227  kmp_team_t *team = th->th.th_team;
4228  kmp_disp_t *pr_buf;
4229  kmp_int64 lo, st;
4230 
4231  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4232  if (team->t.t_serialized) {
4233  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4234  return; // no dependencies if team is serialized
4235  }
4236 
4237  // calculate sequential iteration number (same as in "wait" but no
4238  // out-of-bounds checks)
4239  pr_buf = th->th.th_dispatch;
4240  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4241  num_dims = (size_t)pr_buf->th_doacross_info[0];
4242  lo = pr_buf->th_doacross_info[2];
4243  st = pr_buf->th_doacross_info[4];
4244 #if OMPT_SUPPORT && OMPT_OPTIONAL
4245  ompt_dependence_t deps[num_dims];
4246 #endif
4247  if (st == 1) { // most common case
4248  iter_number = vec[0] - lo;
4249  } else if (st > 0) {
4250  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4251  } else { // negative increment
4252  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4253  }
4254 #if OMPT_SUPPORT && OMPT_OPTIONAL
4255  deps[0].variable.value = iter_number;
4256  deps[0].dependence_type = ompt_dependence_type_source;
4257 #endif
4258  for (i = 1; i < num_dims; ++i) {
4259  kmp_int64 iter, ln;
4260  size_t j = i * 4;
4261  ln = pr_buf->th_doacross_info[j + 1];
4262  lo = pr_buf->th_doacross_info[j + 2];
4263  st = pr_buf->th_doacross_info[j + 4];
4264  if (st == 1) {
4265  iter = vec[i] - lo;
4266  } else if (st > 0) {
4267  iter = (kmp_uint64)(vec[i] - lo) / st;
4268  } else { // st < 0
4269  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4270  }
4271  iter_number = iter + ln * iter_number;
4272 #if OMPT_SUPPORT && OMPT_OPTIONAL
4273  deps[i].variable.value = iter;
4274  deps[i].dependence_type = ompt_dependence_type_source;
4275 #endif
4276  }
4277 #if OMPT_SUPPORT && OMPT_OPTIONAL
4278  if (ompt_enabled.ompt_callback_dependences) {
4279  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4280  &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4281  }
4282 #endif
4283  shft = iter_number % 32; // use 32-bit granularity
4284  iter_number >>= 5; // divided by 32
4285  flag = 1 << shft;
4286  KMP_MB();
4287  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4288  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4289  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4290  (iter_number << 5) + shft));
4291 }
4292 
4293 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4294  __kmp_assert_valid_gtid(gtid);
4295  kmp_int32 num_done;
4296  kmp_info_t *th = __kmp_threads[gtid];
4297  kmp_team_t *team = th->th.th_team;
4298  kmp_disp_t *pr_buf = th->th.th_dispatch;
4299 
4300  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4301  if (team->t.t_serialized) {
4302  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4303  return; // nothing to do
4304  }
4305  num_done =
4306  KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4307  if (num_done == th->th.th_team_nproc) {
4308  // we are the last thread, need to free shared resources
4309  int idx = pr_buf->th_doacross_buf_idx - 1;
4310  dispatch_shared_info_t *sh_buf =
4311  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4312  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4313  (kmp_int64)&sh_buf->doacross_num_done);
4314  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4315  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4316  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4317  sh_buf->doacross_flags = NULL;
4318  sh_buf->doacross_num_done = 0;
4319  sh_buf->doacross_buf_idx +=
4320  __kmp_dispatch_num_buffers; // free buffer for future re-use
4321  }
4322  // free private resources (need to keep buffer index forever)
4323  pr_buf->th_doacross_flags = NULL;
4324  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4325  pr_buf->th_doacross_info = NULL;
4326  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4327 }
4328 
4329 /* OpenMP 5.1 Memory Management routines */
4330 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4331  return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4332 }
4333 
4334 void *omp_aligned_alloc(size_t align, size_t size,
4335  omp_allocator_handle_t allocator) {
4336  return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4337 }
4338 
4339 void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4340  return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4341 }
4342 
4343 void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4344  omp_allocator_handle_t allocator) {
4345  return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4346 }
4347 
4348 void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4349  omp_allocator_handle_t free_allocator) {
4350  return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4351  free_allocator);
4352 }
4353 
4354 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4355  ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4356 }
4357 /* end of OpenMP 5.1 Memory Management routines */
4358 
4359 int __kmpc_get_target_offload(void) {
4360  if (!__kmp_init_serial) {
4361  __kmp_serial_initialize();
4362  }
4363  return __kmp_target_offload;
4364 }
4365 
4366 int __kmpc_pause_resource(kmp_pause_status_t level) {
4367  if (!__kmp_init_serial) {
4368  return 1; // Can't pause if runtime is not initialized
4369  }
4370  return __kmp_pause_resource(level);
4371 }
4372 
4373 void __kmpc_error(ident_t *loc, int severity, const char *message) {
4374  if (!__kmp_init_serial)
4375  __kmp_serial_initialize();
4376 
4377  KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4378 
4379 #if OMPT_SUPPORT
4380  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4381  ompt_callbacks.ompt_callback(ompt_callback_error)(
4382  (ompt_severity_t)severity, message, KMP_STRLEN(message),
4383  OMPT_GET_RETURN_ADDRESS(0));
4384  }
4385 #endif // OMPT_SUPPORT
4386 
4387  char *src_loc;
4388  if (loc && loc->psource) {
4389  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4390  src_loc =
4391  __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4392  __kmp_str_loc_free(&str_loc);
4393  } else {
4394  src_loc = __kmp_str_format("unknown");
4395  }
4396 
4397  if (severity == severity_warning)
4398  KMP_WARNING(UserDirectedWarning, src_loc, message);
4399  else
4400  KMP_FATAL(UserDirectedError, src_loc, message);
4401 
4402  __kmp_str_free(&src_loc);
4403 }
4404 
4405 // Mark begin of scope directive.
4406 void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4407 // reserved is for extension of scope directive and not used.
4408 #if OMPT_SUPPORT && OMPT_OPTIONAL
4409  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4410  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4411  int tid = __kmp_tid_from_gtid(gtid);
4412  ompt_callbacks.ompt_callback(ompt_callback_work)(
4413  ompt_work_scope, ompt_scope_begin,
4414  &(team->t.ompt_team_info.parallel_data),
4415  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4416  OMPT_GET_RETURN_ADDRESS(0));
4417  }
4418 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4419 }
4420 
4421 // Mark end of scope directive
4422 void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4423 // reserved is for extension of scope directive and not used.
4424 #if OMPT_SUPPORT && OMPT_OPTIONAL
4425  if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4426  kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4427  int tid = __kmp_tid_from_gtid(gtid);
4428  ompt_callbacks.ompt_callback(ompt_callback_work)(
4429  ompt_work_scope, ompt_scope_end,
4430  &(team->t.ompt_team_info.parallel_data),
4431  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4432  OMPT_GET_RETURN_ADDRESS(0));
4433  }
4434 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4435 }
4436 
4437 #ifdef KMP_USE_VERSION_SYMBOLS
4438 // For GOMP compatibility there are two versions of each omp_* API.
4439 // One is the plain C symbol and one is the Fortran symbol with an appended
4440 // underscore. When we implement a specific ompc_* version of an omp_*
4441 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4442 // instead of the Fortran versions in kmp_ftn_entry.h
4443 extern "C" {
4444 // Have to undef these from omp.h so they aren't translated into
4445 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4446 #ifdef omp_set_affinity_format
4447 #undef omp_set_affinity_format
4448 #endif
4449 #ifdef omp_get_affinity_format
4450 #undef omp_get_affinity_format
4451 #endif
4452 #ifdef omp_display_affinity
4453 #undef omp_display_affinity
4454 #endif
4455 #ifdef omp_capture_affinity
4456 #undef omp_capture_affinity
4457 #endif
4458 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4459  "OMP_5.0");
4460 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4461  "OMP_5.0");
4462 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4463  "OMP_5.0");
4464 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4465  "OMP_5.0");
4466 } // extern "C"
4467 #endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1595
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236