LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41  // By default __kmpc_begin() is no-op.
42  char *env;
43  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44  __kmp_str_match_true(env)) {
45  __kmp_middle_initialize();
46  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47  } else if (__kmp_ignore_mppbeg() == FALSE) {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  __kmp_internal_begin();
50  KC_TRACE(10, ("__kmpc_begin: called\n"));
51  }
52 }
53 
62 void __kmpc_end(ident_t *loc) {
63  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66  // returns FALSE and __kmpc_end() will unregister this root (it can cause
67  // library shut down).
68  if (__kmp_ignore_mppend() == FALSE) {
69  KC_TRACE(10, ("__kmpc_end: called\n"));
70  KA_TRACE(30, ("__kmpc_end\n"));
71 
72  __kmp_internal_end_thread(-1);
73  }
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75  // Normal exit process on Windows does not allow worker threads of the final
76  // parallel region to finish reporting their events, so shutting down the
77  // library here fixes the issue at least for the cases where __kmpc_end() is
78  // placed properly.
79  if (ompt_enabled.enabled)
80  __kmp_internal_end_library(__kmp_gtid_get_specific());
81 #endif
82 }
83 
103  kmp_int32 gtid = __kmp_entry_gtid();
104 
105  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
106 
107  return gtid;
108 }
109 
125  KC_TRACE(10,
126  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 
128  return TCR_4(__kmp_all_nth);
129 }
130 
138  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139  return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 }
141 
148  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
149 
150  return __kmp_entry_thread()->th.th_team->t.t_nproc;
151 }
152 
159 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160 #ifndef KMP_DEBUG
161 
162  return TRUE;
163 
164 #else
165 
166  const char *semi2;
167  const char *semi3;
168  int line_no;
169 
170  if (__kmp_par_range == 0) {
171  return TRUE;
172  }
173  semi2 = loc->psource;
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  semi2 = strchr(semi2 + 1, ';');
182  if (semi2 == NULL) {
183  return TRUE;
184  }
185  if (__kmp_par_range_filename[0]) {
186  const char *name = semi2 - 1;
187  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188  name--;
189  }
190  if ((*name == '/') || (*name == ';')) {
191  name++;
192  }
193  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194  return __kmp_par_range < 0;
195  }
196  }
197  semi3 = strchr(semi2 + 1, ';');
198  if (__kmp_par_range_routine[0]) {
199  if ((semi3 != NULL) && (semi3 > semi2) &&
200  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201  return __kmp_par_range < 0;
202  }
203  }
204  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206  return __kmp_par_range > 0;
207  }
208  return __kmp_par_range < 0;
209  }
210  return TRUE;
211 
212 #endif /* KMP_DEBUG */
213 }
214 
221 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222  return __kmp_entry_thread()->th.th_root->r.r_active;
223 }
224 
234 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235  kmp_int32 num_threads) {
236  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237  global_tid, num_threads));
238 
239  __kmp_push_num_threads(loc, global_tid, num_threads);
240 }
241 
242 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
244 
245  /* the num_threads are automatically popped */
246 }
247 
248 #if OMP_40_ENABLED
249 
250 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251  kmp_int32 proc_bind) {
252  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253  proc_bind));
254 
255  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256 }
257 
258 #endif /* OMP_40_ENABLED */
259 
270 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271  int gtid = __kmp_entry_gtid();
272 
273 #if (KMP_STATS_ENABLED)
274  // If we were in a serial region, then stop the serial timer, record
275  // the event, and start parallel region timer
276  stats_state_e previous_state = KMP_GET_THREAD_STATE();
277  if (previous_state == stats_state_e::SERIAL_REGION) {
278  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279  } else {
280  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281  }
282  int inParallel = __kmpc_in_parallel(loc);
283  if (inParallel) {
284  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285  } else {
286  KMP_COUNT_BLOCK(OMP_PARALLEL);
287  }
288 #endif
289 
290  // maybe to save thr_state is enough here
291  {
292  va_list ap;
293  va_start(ap, microtask);
294 
295 #if OMPT_SUPPORT
296  ompt_frame_t *ompt_frame;
297  if (ompt_enabled.enabled) {
298  kmp_info_t *master_th = __kmp_threads[gtid];
299  kmp_team_t *parent_team = master_th->th.th_team;
300  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301  if (lwt)
302  ompt_frame = &(lwt->ompt_task_info.frame);
303  else {
304  int tid = __kmp_tid_from_gtid(gtid);
305  ompt_frame = &(
306  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307  }
308  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
309  OMPT_STORE_RETURN_ADDRESS(gtid);
310  }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314  SSC_MARK_FORKING();
315 #endif
316  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
320 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321  &ap
322 #else
323  ap
324 #endif
325  );
326 #if INCLUDE_SSC_MARKS
327  SSC_MARK_JOINING();
328 #endif
329  __kmp_join_call(loc, gtid
330 #if OMPT_SUPPORT
331  ,
332  fork_context_intel
333 #endif
334  );
335 
336  va_end(ap);
337  }
338 
339 #if KMP_STATS_ENABLED
340  if (previous_state == stats_state_e::SERIAL_REGION) {
341  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342  } else {
343  KMP_POP_PARTITIONED_TIMER();
344  }
345 #endif // KMP_STATS_ENABLED
346 }
347 
348 #if OMP_40_ENABLED
349 
360 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361  kmp_int32 num_teams, kmp_int32 num_threads) {
362  KA_TRACE(20,
363  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364  global_tid, num_teams, num_threads));
365 
366  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367 }
368 
379 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380  ...) {
381  int gtid = __kmp_entry_gtid();
382  kmp_info_t *this_thr = __kmp_threads[gtid];
383  va_list ap;
384  va_start(ap, microtask);
385 
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387 
388  // remember teams entry point and nesting level
389  this_thr->th.th_teams_microtask = microtask;
390  this_thr->th.th_teams_level =
391  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392 
393 #if OMPT_SUPPORT
394  kmp_team_t *parent_team = this_thr->th.th_team;
395  int tid = __kmp_tid_from_gtid(gtid);
396  if (ompt_enabled.enabled) {
397  parent_team->t.t_implicit_task_taskdata[tid]
398  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399  }
400  OMPT_STORE_RETURN_ADDRESS(gtid);
401 #endif
402 
403  // check if __kmpc_push_num_teams called, set default number of teams
404  // otherwise
405  if (this_thr->th.th_teams_size.nteams == 0) {
406  __kmp_push_num_teams(loc, gtid, 0, 0);
407  }
408  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 
412  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413  VOLATILE_CAST(microtask_t)
414  __kmp_teams_master, // "wrapped" task
415  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
417  &ap
418 #else
419  ap
420 #endif
421  );
422  __kmp_join_call(loc, gtid
423 #if OMPT_SUPPORT
424  ,
425  fork_context_intel
426 #endif
427  );
428 
429  this_thr->th.th_teams_microtask = NULL;
430  this_thr->th.th_teams_level = 0;
431  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432  va_end(ap);
433 }
434 #endif /* OMP_40_ENABLED */
435 
436 // I don't think this function should ever have been exported.
437 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438 // openmp code ever called it, but it's been exported from the RTL for so
439 // long that I'm afraid to remove the definition.
440 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441 
454 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455 // The implementation is now in kmp_runtime.cpp so that it can share static
456 // functions with kmp_fork_call since the tasks to be done are similar in
457 // each case.
458 #if OMPT_SUPPORT
459  OMPT_STORE_RETURN_ADDRESS(global_tid);
460 #endif
461  __kmp_serialized_parallel(loc, global_tid);
462 }
463 
471 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472  kmp_internal_control_t *top;
473  kmp_info_t *this_thr;
474  kmp_team_t *serial_team;
475 
476  KC_TRACE(10,
477  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
478 
479  /* skip all this code for autopar serialized loops since it results in
480  unacceptable overhead */
481  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482  return;
483 
484  // Not autopar code
485  if (!TCR_4(__kmp_init_parallel))
486  __kmp_parallel_initialize();
487 
488 #if OMP_50_ENABLED
489  __kmp_resume_if_soft_paused();
490 #endif
491 
492  this_thr = __kmp_threads[global_tid];
493  serial_team = this_thr->th.th_serial_team;
494 
495 #if OMP_45_ENABLED
496  kmp_task_team_t *task_team = this_thr->th.th_task_team;
497 
498  // we need to wait for the proxy tasks before finishing the thread
499  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
500  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
501 #endif
502 
503  KMP_MB();
504  KMP_DEBUG_ASSERT(serial_team);
505  KMP_ASSERT(serial_team->t.t_serialized);
506  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
507  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
508  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
509  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
510 
511 #if OMPT_SUPPORT
512  if (ompt_enabled.enabled &&
513  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
514  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
515  if (ompt_enabled.ompt_callback_implicit_task) {
516  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
517  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
518  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
519  }
520 
521  // reset clear the task id only after unlinking the task
522  ompt_data_t *parent_task_data;
523  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
524 
525  if (ompt_enabled.ompt_callback_parallel_end) {
526  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
527  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
528  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
529  }
530  __ompt_lw_taskteam_unlink(this_thr);
531  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
532  }
533 #endif
534 
535  /* If necessary, pop the internal control stack values and replace the team
536  * values */
537  top = serial_team->t.t_control_stack_top;
538  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
539  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
540  serial_team->t.t_control_stack_top = top->next;
541  __kmp_free(top);
542  }
543 
544  // if( serial_team -> t.t_serialized > 1 )
545  serial_team->t.t_level--;
546 
547  /* pop dispatch buffers stack */
548  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
549  {
550  dispatch_private_info_t *disp_buffer =
551  serial_team->t.t_dispatch->th_disp_buffer;
552  serial_team->t.t_dispatch->th_disp_buffer =
553  serial_team->t.t_dispatch->th_disp_buffer->next;
554  __kmp_free(disp_buffer);
555  }
556 #if OMP_50_ENABLED
557  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
558 #endif
559 
560  --serial_team->t.t_serialized;
561  if (serial_team->t.t_serialized == 0) {
562 
563 /* return to the parallel section */
564 
565 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
566  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
567  __kmp_clear_x87_fpu_status_word();
568  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
569  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
570  }
571 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
572 
573  this_thr->th.th_team = serial_team->t.t_parent;
574  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
575 
576  /* restore values cached in the thread */
577  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
578  this_thr->th.th_team_master =
579  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
580  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
581 
582  /* TODO the below shouldn't need to be adjusted for serialized teams */
583  this_thr->th.th_dispatch =
584  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
585 
586  __kmp_pop_current_task_from_thread(this_thr);
587 
588  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
589  this_thr->th.th_current_task->td_flags.executing = 1;
590 
591  if (__kmp_tasking_mode != tskm_immediate_exec) {
592  // Copy the task team from the new child / old parent team to the thread.
593  this_thr->th.th_task_team =
594  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
595  KA_TRACE(20,
596  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
597  "team %p\n",
598  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
599  }
600  } else {
601  if (__kmp_tasking_mode != tskm_immediate_exec) {
602  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
603  "depth of serial team %p to %d\n",
604  global_tid, serial_team, serial_team->t.t_serialized));
605  }
606  }
607 
608  if (__kmp_env_consistency_check)
609  __kmp_pop_parallel(global_tid, NULL);
610 #if OMPT_SUPPORT
611  if (ompt_enabled.enabled)
612  this_thr->th.ompt_thread_info.state =
613  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
614  : ompt_state_work_parallel);
615 #endif
616 }
617 
626 void __kmpc_flush(ident_t *loc) {
627  KC_TRACE(10, ("__kmpc_flush: called\n"));
628 
629  /* need explicit __mf() here since use volatile instead in library */
630  KMP_MB(); /* Flush all pending memory write invalidates. */
631 
632 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
633 #if KMP_MIC
634 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
635 // We shouldn't need it, though, since the ABI rules require that
636 // * If the compiler generates NGO stores it also generates the fence
637 // * If users hand-code NGO stores they should insert the fence
638 // therefore no incomplete unordered stores should be visible.
639 #else
640  // C74404
641  // This is to address non-temporal store instructions (sfence needed).
642  // The clflush instruction is addressed either (mfence needed).
643  // Probably the non-temporal load monvtdqa instruction should also be
644  // addressed.
645  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
646  if (!__kmp_cpuinfo.initialized) {
647  __kmp_query_cpuid(&__kmp_cpuinfo);
648  }
649  if (!__kmp_cpuinfo.sse2) {
650  // CPU cannot execute SSE2 instructions.
651  } else {
652 #if KMP_COMPILER_ICC
653  _mm_mfence();
654 #elif KMP_COMPILER_MSVC
655  MemoryBarrier();
656 #else
657  __sync_synchronize();
658 #endif // KMP_COMPILER_ICC
659  }
660 #endif // KMP_MIC
661 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
662 // Nothing to see here move along
663 #elif KMP_ARCH_PPC64
664 // Nothing needed here (we have a real MB above).
665 #if KMP_OS_CNK
666  // The flushing thread needs to yield here; this prevents a
667  // busy-waiting thread from saturating the pipeline. flush is
668  // often used in loops like this:
669  // while (!flag) {
670  // #pragma omp flush(flag)
671  // }
672  // and adding the yield here is good for at least a 10x speedup
673  // when running >2 threads per core (on the NAS LU benchmark).
674  __kmp_yield(TRUE);
675 #endif
676 #else
677 #error Unknown or unsupported architecture
678 #endif
679 
680 #if OMPT_SUPPORT && OMPT_OPTIONAL
681  if (ompt_enabled.ompt_callback_flush) {
682  ompt_callbacks.ompt_callback(ompt_callback_flush)(
683  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
684  }
685 #endif
686 }
687 
688 /* -------------------------------------------------------------------------- */
696 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
697  KMP_COUNT_BLOCK(OMP_BARRIER);
698  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
699 
700  if (!TCR_4(__kmp_init_parallel))
701  __kmp_parallel_initialize();
702 
703 #if OMP_50_ENABLED
704  __kmp_resume_if_soft_paused();
705 #endif
706 
707  if (__kmp_env_consistency_check) {
708  if (loc == 0) {
709  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
710  }
711 
712  __kmp_check_barrier(global_tid, ct_barrier, loc);
713  }
714 
715 #if OMPT_SUPPORT
716  ompt_frame_t *ompt_frame;
717  if (ompt_enabled.enabled) {
718  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
719  if (ompt_frame->enter_frame.ptr == NULL)
720  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
721  OMPT_STORE_RETURN_ADDRESS(global_tid);
722  }
723 #endif
724  __kmp_threads[global_tid]->th.th_ident = loc;
725  // TODO: explicit barrier_wait_id:
726  // this function is called when 'barrier' directive is present or
727  // implicit barrier at the end of a worksharing construct.
728  // 1) better to add a per-thread barrier counter to a thread data structure
729  // 2) set to 0 when a new team is created
730  // 4) no sync is required
731 
732  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
733 #if OMPT_SUPPORT && OMPT_OPTIONAL
734  if (ompt_enabled.enabled) {
735  ompt_frame->enter_frame = ompt_data_none;
736  }
737 #endif
738 }
739 
740 /* The BARRIER for a MASTER section is always explicit */
747 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
748  int status = 0;
749 
750  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
751 
752  if (!TCR_4(__kmp_init_parallel))
753  __kmp_parallel_initialize();
754 
755 #if OMP_50_ENABLED
756  __kmp_resume_if_soft_paused();
757 #endif
758 
759  if (KMP_MASTER_GTID(global_tid)) {
760  KMP_COUNT_BLOCK(OMP_MASTER);
761  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
762  status = 1;
763  }
764 
765 #if OMPT_SUPPORT && OMPT_OPTIONAL
766  if (status) {
767  if (ompt_enabled.ompt_callback_master) {
768  kmp_info_t *this_thr = __kmp_threads[global_tid];
769  kmp_team_t *team = this_thr->th.th_team;
770 
771  int tid = __kmp_tid_from_gtid(global_tid);
772  ompt_callbacks.ompt_callback(ompt_callback_master)(
773  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
774  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
775  OMPT_GET_RETURN_ADDRESS(0));
776  }
777  }
778 #endif
779 
780  if (__kmp_env_consistency_check) {
781 #if KMP_USE_DYNAMIC_LOCK
782  if (status)
783  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
784  else
785  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
786 #else
787  if (status)
788  __kmp_push_sync(global_tid, ct_master, loc, NULL);
789  else
790  __kmp_check_sync(global_tid, ct_master, loc, NULL);
791 #endif
792  }
793 
794  return status;
795 }
796 
805 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
806  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
807 
808  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
809  KMP_POP_PARTITIONED_TIMER();
810 
811 #if OMPT_SUPPORT && OMPT_OPTIONAL
812  kmp_info_t *this_thr = __kmp_threads[global_tid];
813  kmp_team_t *team = this_thr->th.th_team;
814  if (ompt_enabled.ompt_callback_master) {
815  int tid = __kmp_tid_from_gtid(global_tid);
816  ompt_callbacks.ompt_callback(ompt_callback_master)(
817  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
818  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
819  OMPT_GET_RETURN_ADDRESS(0));
820  }
821 #endif
822 
823  if (__kmp_env_consistency_check) {
824  if (global_tid < 0)
825  KMP_WARNING(ThreadIdentInvalid);
826 
827  if (KMP_MASTER_GTID(global_tid))
828  __kmp_pop_sync(global_tid, ct_master, loc);
829  }
830 }
831 
839 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
840  int cid = 0;
841  kmp_info_t *th;
842  KMP_DEBUG_ASSERT(__kmp_init_serial);
843 
844  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
845 
846  if (!TCR_4(__kmp_init_parallel))
847  __kmp_parallel_initialize();
848 
849 #if OMP_50_ENABLED
850  __kmp_resume_if_soft_paused();
851 #endif
852 
853 #if USE_ITT_BUILD
854  __kmp_itt_ordered_prep(gtid);
855 // TODO: ordered_wait_id
856 #endif /* USE_ITT_BUILD */
857 
858  th = __kmp_threads[gtid];
859 
860 #if OMPT_SUPPORT && OMPT_OPTIONAL
861  kmp_team_t *team;
862  ompt_wait_id_t lck;
863  void *codeptr_ra;
864  if (ompt_enabled.enabled) {
865  OMPT_STORE_RETURN_ADDRESS(gtid);
866  team = __kmp_team_from_gtid(gtid);
867  lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
868  /* OMPT state update */
869  th->th.ompt_thread_info.wait_id = lck;
870  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
871 
872  /* OMPT event callback */
873  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
874  if (ompt_enabled.ompt_callback_mutex_acquire) {
875  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
876  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
877  (ompt_wait_id_t)lck, codeptr_ra);
878  }
879  }
880 #endif
881 
882  if (th->th.th_dispatch->th_deo_fcn != 0)
883  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
884  else
885  __kmp_parallel_deo(&gtid, &cid, loc);
886 
887 #if OMPT_SUPPORT && OMPT_OPTIONAL
888  if (ompt_enabled.enabled) {
889  /* OMPT state update */
890  th->th.ompt_thread_info.state = ompt_state_work_parallel;
891  th->th.ompt_thread_info.wait_id = 0;
892 
893  /* OMPT event callback */
894  if (ompt_enabled.ompt_callback_mutex_acquired) {
895  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
896  ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
897  }
898  }
899 #endif
900 
901 #if USE_ITT_BUILD
902  __kmp_itt_ordered_start(gtid);
903 #endif /* USE_ITT_BUILD */
904 }
905 
913 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
914  int cid = 0;
915  kmp_info_t *th;
916 
917  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
918 
919 #if USE_ITT_BUILD
920  __kmp_itt_ordered_end(gtid);
921 // TODO: ordered_wait_id
922 #endif /* USE_ITT_BUILD */
923 
924  th = __kmp_threads[gtid];
925 
926  if (th->th.th_dispatch->th_dxo_fcn != 0)
927  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
928  else
929  __kmp_parallel_dxo(&gtid, &cid, loc);
930 
931 #if OMPT_SUPPORT && OMPT_OPTIONAL
932  OMPT_STORE_RETURN_ADDRESS(gtid);
933  if (ompt_enabled.ompt_callback_mutex_released) {
934  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
935  ompt_mutex_ordered,
936  (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
937  OMPT_LOAD_RETURN_ADDRESS(gtid));
938  }
939 #endif
940 }
941 
942 #if KMP_USE_DYNAMIC_LOCK
943 
944 static __forceinline void
945 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
946  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
947  // Pointer to the allocated indirect lock is written to crit, while indexing
948  // is ignored.
949  void *idx;
950  kmp_indirect_lock_t **lck;
951  lck = (kmp_indirect_lock_t **)crit;
952  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
953  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
954  KMP_SET_I_LOCK_LOCATION(ilk, loc);
955  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
956  KA_TRACE(20,
957  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
958 #if USE_ITT_BUILD
959  __kmp_itt_critical_creating(ilk->lock, loc);
960 #endif
961  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
962  if (status == 0) {
963 #if USE_ITT_BUILD
964  __kmp_itt_critical_destroyed(ilk->lock);
965 #endif
966  // We don't really need to destroy the unclaimed lock here since it will be
967  // cleaned up at program exit.
968  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
969  }
970  KMP_DEBUG_ASSERT(*lck != NULL);
971 }
972 
973 // Fast-path acquire tas lock
974 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
975  { \
976  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
977  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
978  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
979  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
980  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
981  kmp_uint32 spins; \
982  KMP_FSYNC_PREPARE(l); \
983  KMP_INIT_YIELD(spins); \
984  if (TCR_4(__kmp_nth) > \
985  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
986  KMP_YIELD(TRUE); \
987  } else { \
988  KMP_YIELD_SPIN(spins); \
989  } \
990  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
991  while ( \
992  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
993  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
994  __kmp_spin_backoff(&backoff); \
995  if (TCR_4(__kmp_nth) > \
996  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
997  KMP_YIELD(TRUE); \
998  } else { \
999  KMP_YIELD_SPIN(spins); \
1000  } \
1001  } \
1002  } \
1003  KMP_FSYNC_ACQUIRED(l); \
1004  }
1005 
1006 // Fast-path test tas lock
1007 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1008  { \
1009  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1010  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1011  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1012  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1013  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1014  }
1015 
1016 // Fast-path release tas lock
1017 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1018  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1019 
1020 #if KMP_USE_FUTEX
1021 
1022 #include <sys/syscall.h>
1023 #include <unistd.h>
1024 #ifndef FUTEX_WAIT
1025 #define FUTEX_WAIT 0
1026 #endif
1027 #ifndef FUTEX_WAKE
1028 #define FUTEX_WAKE 1
1029 #endif
1030 
1031 // Fast-path acquire futex lock
1032 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1033  { \
1034  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1035  kmp_int32 gtid_code = (gtid + 1) << 1; \
1036  KMP_MB(); \
1037  KMP_FSYNC_PREPARE(ftx); \
1038  kmp_int32 poll_val; \
1039  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1040  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1041  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1042  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1043  if (!cond) { \
1044  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1045  poll_val | \
1046  KMP_LOCK_BUSY(1, futex))) { \
1047  continue; \
1048  } \
1049  poll_val |= KMP_LOCK_BUSY(1, futex); \
1050  } \
1051  kmp_int32 rc; \
1052  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1053  NULL, NULL, 0)) != 0) { \
1054  continue; \
1055  } \
1056  gtid_code |= 1; \
1057  } \
1058  KMP_FSYNC_ACQUIRED(ftx); \
1059  }
1060 
1061 // Fast-path test futex lock
1062 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1063  { \
1064  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1065  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1066  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1067  KMP_FSYNC_ACQUIRED(ftx); \
1068  rc = TRUE; \
1069  } else { \
1070  rc = FALSE; \
1071  } \
1072  }
1073 
1074 // Fast-path release futex lock
1075 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1076  { \
1077  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1078  KMP_MB(); \
1079  KMP_FSYNC_RELEASING(ftx); \
1080  kmp_int32 poll_val = \
1081  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1082  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1083  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1084  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1085  } \
1086  KMP_MB(); \
1087  KMP_YIELD(TCR_4(__kmp_nth) > \
1088  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1089  }
1090 
1091 #endif // KMP_USE_FUTEX
1092 
1093 #else // KMP_USE_DYNAMIC_LOCK
1094 
1095 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1096  ident_t const *loc,
1097  kmp_int32 gtid) {
1098  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1099 
1100  // Because of the double-check, the following load doesn't need to be volatile
1101  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1102 
1103  if (lck == NULL) {
1104  void *idx;
1105 
1106  // Allocate & initialize the lock.
1107  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1108  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1109  __kmp_init_user_lock_with_checks(lck);
1110  __kmp_set_user_lock_location(lck, loc);
1111 #if USE_ITT_BUILD
1112  __kmp_itt_critical_creating(lck);
1113 // __kmp_itt_critical_creating() should be called *before* the first usage
1114 // of underlying lock. It is the only place where we can guarantee it. There
1115 // are chances the lock will destroyed with no usage, but it is not a
1116 // problem, because this is not real event seen by user but rather setting
1117 // name for object (lock). See more details in kmp_itt.h.
1118 #endif /* USE_ITT_BUILD */
1119 
1120  // Use a cmpxchg instruction to slam the start of the critical section with
1121  // the lock pointer. If another thread beat us to it, deallocate the lock,
1122  // and use the lock that the other thread allocated.
1123  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1124 
1125  if (status == 0) {
1126 // Deallocate the lock and reload the value.
1127 #if USE_ITT_BUILD
1128  __kmp_itt_critical_destroyed(lck);
1129 // Let ITT know the lock is destroyed and the same memory location may be reused
1130 // for another purpose.
1131 #endif /* USE_ITT_BUILD */
1132  __kmp_destroy_user_lock_with_checks(lck);
1133  __kmp_user_lock_free(&idx, gtid, lck);
1134  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1135  KMP_DEBUG_ASSERT(lck != NULL);
1136  }
1137  }
1138  return lck;
1139 }
1140 
1141 #endif // KMP_USE_DYNAMIC_LOCK
1142 
1153 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1154  kmp_critical_name *crit) {
1155 #if KMP_USE_DYNAMIC_LOCK
1156 #if OMPT_SUPPORT && OMPT_OPTIONAL
1157  OMPT_STORE_RETURN_ADDRESS(global_tid);
1158 #endif // OMPT_SUPPORT
1159  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1160 #else
1161  KMP_COUNT_BLOCK(OMP_CRITICAL);
1162 #if OMPT_SUPPORT && OMPT_OPTIONAL
1163  ompt_state_t prev_state = ompt_state_undefined;
1164  ompt_thread_info_t ti;
1165 #endif
1166  kmp_user_lock_p lck;
1167 
1168  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1169 
1170  // TODO: add THR_OVHD_STATE
1171 
1172  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1173  KMP_CHECK_USER_LOCK_INIT();
1174 
1175  if ((__kmp_user_lock_kind == lk_tas) &&
1176  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1177  lck = (kmp_user_lock_p)crit;
1178  }
1179 #if KMP_USE_FUTEX
1180  else if ((__kmp_user_lock_kind == lk_futex) &&
1181  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1182  lck = (kmp_user_lock_p)crit;
1183  }
1184 #endif
1185  else { // ticket, queuing or drdpa
1186  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1187  }
1188 
1189  if (__kmp_env_consistency_check)
1190  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1191 
1192 // since the critical directive binds to all threads, not just the current
1193 // team we have to check this even if we are in a serialized team.
1194 // also, even if we are the uber thread, we still have to conduct the lock,
1195 // as we have to contend with sibling threads.
1196 
1197 #if USE_ITT_BUILD
1198  __kmp_itt_critical_acquiring(lck);
1199 #endif /* USE_ITT_BUILD */
1200 #if OMPT_SUPPORT && OMPT_OPTIONAL
1201  OMPT_STORE_RETURN_ADDRESS(gtid);
1202  void *codeptr_ra = NULL;
1203  if (ompt_enabled.enabled) {
1204  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1205  /* OMPT state update */
1206  prev_state = ti.state;
1207  ti.wait_id = (ompt_wait_id_t)lck;
1208  ti.state = ompt_state_wait_critical;
1209 
1210  /* OMPT event callback */
1211  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1212  if (ompt_enabled.ompt_callback_mutex_acquire) {
1213  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1214  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1215  (ompt_wait_id_t)crit, codeptr_ra);
1216  }
1217  }
1218 #endif
1219  // Value of 'crit' should be good for using as a critical_id of the critical
1220  // section directive.
1221  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1222 
1223 #if USE_ITT_BUILD
1224  __kmp_itt_critical_acquired(lck);
1225 #endif /* USE_ITT_BUILD */
1226 #if OMPT_SUPPORT && OMPT_OPTIONAL
1227  if (ompt_enabled.enabled) {
1228  /* OMPT state update */
1229  ti.state = prev_state;
1230  ti.wait_id = 0;
1231 
1232  /* OMPT event callback */
1233  if (ompt_enabled.ompt_callback_mutex_acquired) {
1234  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1235  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
1236  }
1237  }
1238 #endif
1239  KMP_POP_PARTITIONED_TIMER();
1240 
1241  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1242  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1243 #endif // KMP_USE_DYNAMIC_LOCK
1244 }
1245 
1246 #if KMP_USE_DYNAMIC_LOCK
1247 
1248 // Converts the given hint to an internal lock implementation
1249 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1250 #if KMP_USE_TSX
1251 #define KMP_TSX_LOCK(seq) lockseq_##seq
1252 #else
1253 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1254 #endif
1255 
1256 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1257 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1258 #else
1259 #define KMP_CPUINFO_RTM 0
1260 #endif
1261 
1262  // Hints that do not require further logic
1263  if (hint & kmp_lock_hint_hle)
1264  return KMP_TSX_LOCK(hle);
1265  if (hint & kmp_lock_hint_rtm)
1266  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1267  if (hint & kmp_lock_hint_adaptive)
1268  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1269 
1270  // Rule out conflicting hints first by returning the default lock
1271  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1272  return __kmp_user_lock_seq;
1273  if ((hint & omp_lock_hint_speculative) &&
1274  (hint & omp_lock_hint_nonspeculative))
1275  return __kmp_user_lock_seq;
1276 
1277  // Do not even consider speculation when it appears to be contended
1278  if (hint & omp_lock_hint_contended)
1279  return lockseq_queuing;
1280 
1281  // Uncontended lock without speculation
1282  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1283  return lockseq_tas;
1284 
1285  // HLE lock for speculation
1286  if (hint & omp_lock_hint_speculative)
1287  return KMP_TSX_LOCK(hle);
1288 
1289  return __kmp_user_lock_seq;
1290 }
1291 
1292 #if OMPT_SUPPORT && OMPT_OPTIONAL
1293 #if KMP_USE_DYNAMIC_LOCK
1294 static kmp_mutex_impl_t
1295 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1296  if (user_lock) {
1297  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1298  case 0:
1299  break;
1300 #if KMP_USE_FUTEX
1301  case locktag_futex:
1302  return kmp_mutex_impl_queuing;
1303 #endif
1304  case locktag_tas:
1305  return kmp_mutex_impl_spin;
1306 #if KMP_USE_TSX
1307  case locktag_hle:
1308  return kmp_mutex_impl_speculative;
1309 #endif
1310  default:
1311  return kmp_mutex_impl_none;
1312  }
1313  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1314  }
1315  KMP_ASSERT(ilock);
1316  switch (ilock->type) {
1317 #if KMP_USE_TSX
1318  case locktag_adaptive:
1319  case locktag_rtm:
1320  return kmp_mutex_impl_speculative;
1321 #endif
1322  case locktag_nested_tas:
1323  return kmp_mutex_impl_spin;
1324 #if KMP_USE_FUTEX
1325  case locktag_nested_futex:
1326 #endif
1327  case locktag_ticket:
1328  case locktag_queuing:
1329  case locktag_drdpa:
1330  case locktag_nested_ticket:
1331  case locktag_nested_queuing:
1332  case locktag_nested_drdpa:
1333  return kmp_mutex_impl_queuing;
1334  default:
1335  return kmp_mutex_impl_none;
1336  }
1337 }
1338 #else
1339 // For locks without dynamic binding
1340 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1341  switch (__kmp_user_lock_kind) {
1342  case lk_tas:
1343  return kmp_mutex_impl_spin;
1344 #if KMP_USE_FUTEX
1345  case lk_futex:
1346 #endif
1347  case lk_ticket:
1348  case lk_queuing:
1349  case lk_drdpa:
1350  return kmp_mutex_impl_queuing;
1351 #if KMP_USE_TSX
1352  case lk_hle:
1353  case lk_rtm:
1354  case lk_adaptive:
1355  return kmp_mutex_impl_speculative;
1356 #endif
1357  default:
1358  return kmp_mutex_impl_none;
1359  }
1360 }
1361 #endif // KMP_USE_DYNAMIC_LOCK
1362 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1363 
1377 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1378  kmp_critical_name *crit, uint32_t hint) {
1379  KMP_COUNT_BLOCK(OMP_CRITICAL);
1380  kmp_user_lock_p lck;
1381 #if OMPT_SUPPORT && OMPT_OPTIONAL
1382  ompt_state_t prev_state = ompt_state_undefined;
1383  ompt_thread_info_t ti;
1384  // This is the case, if called from __kmpc_critical:
1385  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1386  if (!codeptr)
1387  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1388 #endif
1389 
1390  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1391 
1392  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1393  // Check if it is initialized.
1394  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1395  if (*lk == 0) {
1396  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1397  if (KMP_IS_D_LOCK(lckseq)) {
1398  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1399  KMP_GET_D_TAG(lckseq));
1400  } else {
1401  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1402  }
1403  }
1404  // Branch for accessing the actual lock object and set operation. This
1405  // branching is inevitable since this lock initialization does not follow the
1406  // normal dispatch path (lock table is not used).
1407  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1408  lck = (kmp_user_lock_p)lk;
1409  if (__kmp_env_consistency_check) {
1410  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1411  __kmp_map_hint_to_lock(hint));
1412  }
1413 #if USE_ITT_BUILD
1414  __kmp_itt_critical_acquiring(lck);
1415 #endif
1416 #if OMPT_SUPPORT && OMPT_OPTIONAL
1417  if (ompt_enabled.enabled) {
1418  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1419  /* OMPT state update */
1420  prev_state = ti.state;
1421  ti.wait_id = (ompt_wait_id_t)lck;
1422  ti.state = ompt_state_wait_critical;
1423 
1424  /* OMPT event callback */
1425  if (ompt_enabled.ompt_callback_mutex_acquire) {
1426  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1427  ompt_mutex_critical, (unsigned int)hint,
1428  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
1429  }
1430  }
1431 #endif
1432 #if KMP_USE_INLINED_TAS
1433  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1434  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1435  } else
1436 #elif KMP_USE_INLINED_FUTEX
1437  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1438  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1439  } else
1440 #endif
1441  {
1442  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1443  }
1444  } else {
1445  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1446  lck = ilk->lock;
1447  if (__kmp_env_consistency_check) {
1448  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1449  __kmp_map_hint_to_lock(hint));
1450  }
1451 #if USE_ITT_BUILD
1452  __kmp_itt_critical_acquiring(lck);
1453 #endif
1454 #if OMPT_SUPPORT && OMPT_OPTIONAL
1455  if (ompt_enabled.enabled) {
1456  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1457  /* OMPT state update */
1458  prev_state = ti.state;
1459  ti.wait_id = (ompt_wait_id_t)lck;
1460  ti.state = ompt_state_wait_critical;
1461 
1462  /* OMPT event callback */
1463  if (ompt_enabled.ompt_callback_mutex_acquire) {
1464  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1465  ompt_mutex_critical, (unsigned int)hint,
1466  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
1467  }
1468  }
1469 #endif
1470  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1471  }
1472  KMP_POP_PARTITIONED_TIMER();
1473 
1474 #if USE_ITT_BUILD
1475  __kmp_itt_critical_acquired(lck);
1476 #endif /* USE_ITT_BUILD */
1477 #if OMPT_SUPPORT && OMPT_OPTIONAL
1478  if (ompt_enabled.enabled) {
1479  /* OMPT state update */
1480  ti.state = prev_state;
1481  ti.wait_id = 0;
1482 
1483  /* OMPT event callback */
1484  if (ompt_enabled.ompt_callback_mutex_acquired) {
1485  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1486  ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
1487  }
1488  }
1489 #endif
1490 
1491  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1492  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1493 } // __kmpc_critical_with_hint
1494 
1495 #endif // KMP_USE_DYNAMIC_LOCK
1496 
1506 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1507  kmp_critical_name *crit) {
1508  kmp_user_lock_p lck;
1509 
1510  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1511 
1512 #if KMP_USE_DYNAMIC_LOCK
1513  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1514  lck = (kmp_user_lock_p)crit;
1515  KMP_ASSERT(lck != NULL);
1516  if (__kmp_env_consistency_check) {
1517  __kmp_pop_sync(global_tid, ct_critical, loc);
1518  }
1519 #if USE_ITT_BUILD
1520  __kmp_itt_critical_releasing(lck);
1521 #endif
1522 #if KMP_USE_INLINED_TAS
1523  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1524  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1525  } else
1526 #elif KMP_USE_INLINED_FUTEX
1527  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1528  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1529  } else
1530 #endif
1531  {
1532  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1533  }
1534  } else {
1535  kmp_indirect_lock_t *ilk =
1536  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1537  KMP_ASSERT(ilk != NULL);
1538  lck = ilk->lock;
1539  if (__kmp_env_consistency_check) {
1540  __kmp_pop_sync(global_tid, ct_critical, loc);
1541  }
1542 #if USE_ITT_BUILD
1543  __kmp_itt_critical_releasing(lck);
1544 #endif
1545  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1546  }
1547 
1548 #else // KMP_USE_DYNAMIC_LOCK
1549 
1550  if ((__kmp_user_lock_kind == lk_tas) &&
1551  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1552  lck = (kmp_user_lock_p)crit;
1553  }
1554 #if KMP_USE_FUTEX
1555  else if ((__kmp_user_lock_kind == lk_futex) &&
1556  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1557  lck = (kmp_user_lock_p)crit;
1558  }
1559 #endif
1560  else { // ticket, queuing or drdpa
1561  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1562  }
1563 
1564  KMP_ASSERT(lck != NULL);
1565 
1566  if (__kmp_env_consistency_check)
1567  __kmp_pop_sync(global_tid, ct_critical, loc);
1568 
1569 #if USE_ITT_BUILD
1570  __kmp_itt_critical_releasing(lck);
1571 #endif /* USE_ITT_BUILD */
1572  // Value of 'crit' should be good for using as a critical_id of the critical
1573  // section directive.
1574  __kmp_release_user_lock_with_checks(lck, global_tid);
1575 
1576 #endif // KMP_USE_DYNAMIC_LOCK
1577 
1578 #if OMPT_SUPPORT && OMPT_OPTIONAL
1579  /* OMPT release event triggers after lock is released; place here to trigger
1580  * for all #if branches */
1581  OMPT_STORE_RETURN_ADDRESS(global_tid);
1582  if (ompt_enabled.ompt_callback_mutex_released) {
1583  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1584  ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1585  }
1586 #endif
1587 
1588  KMP_POP_PARTITIONED_TIMER();
1589  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1590 }
1591 
1601 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1602  int status;
1603 
1604  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1605 
1606  if (!TCR_4(__kmp_init_parallel))
1607  __kmp_parallel_initialize();
1608 
1609 #if OMP_50_ENABLED
1610  __kmp_resume_if_soft_paused();
1611 #endif
1612 
1613  if (__kmp_env_consistency_check)
1614  __kmp_check_barrier(global_tid, ct_barrier, loc);
1615 
1616 #if OMPT_SUPPORT
1617  ompt_frame_t *ompt_frame;
1618  if (ompt_enabled.enabled) {
1619  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1620  if (ompt_frame->enter_frame.ptr == NULL)
1621  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1622  OMPT_STORE_RETURN_ADDRESS(global_tid);
1623  }
1624 #endif
1625 #if USE_ITT_NOTIFY
1626  __kmp_threads[global_tid]->th.th_ident = loc;
1627 #endif
1628  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1629 #if OMPT_SUPPORT && OMPT_OPTIONAL
1630  if (ompt_enabled.enabled) {
1631  ompt_frame->enter_frame = ompt_data_none;
1632  }
1633 #endif
1634 
1635  return (status != 0) ? 0 : 1;
1636 }
1637 
1647 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1648  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1649 
1650  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1651 }
1652 
1663 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1664  kmp_int32 ret;
1665 
1666  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1667 
1668  if (!TCR_4(__kmp_init_parallel))
1669  __kmp_parallel_initialize();
1670 
1671 #if OMP_50_ENABLED
1672  __kmp_resume_if_soft_paused();
1673 #endif
1674 
1675  if (__kmp_env_consistency_check) {
1676  if (loc == 0) {
1677  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1678  }
1679  __kmp_check_barrier(global_tid, ct_barrier, loc);
1680  }
1681 
1682 #if OMPT_SUPPORT
1683  ompt_frame_t *ompt_frame;
1684  if (ompt_enabled.enabled) {
1685  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1686  if (ompt_frame->enter_frame.ptr == NULL)
1687  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1688  OMPT_STORE_RETURN_ADDRESS(global_tid);
1689  }
1690 #endif
1691 #if USE_ITT_NOTIFY
1692  __kmp_threads[global_tid]->th.th_ident = loc;
1693 #endif
1694  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1695 #if OMPT_SUPPORT && OMPT_OPTIONAL
1696  if (ompt_enabled.enabled) {
1697  ompt_frame->enter_frame = ompt_data_none;
1698  }
1699 #endif
1700 
1701  ret = __kmpc_master(loc, global_tid);
1702 
1703  if (__kmp_env_consistency_check) {
1704  /* there's no __kmpc_end_master called; so the (stats) */
1705  /* actions of __kmpc_end_master are done here */
1706 
1707  if (global_tid < 0) {
1708  KMP_WARNING(ThreadIdentInvalid);
1709  }
1710  if (ret) {
1711  /* only one thread should do the pop since only */
1712  /* one did the push (see __kmpc_master()) */
1713 
1714  __kmp_pop_sync(global_tid, ct_master, loc);
1715  }
1716  }
1717 
1718  return (ret);
1719 }
1720 
1721 /* The BARRIER for a SINGLE process section is always explicit */
1733 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1734  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1735 
1736  if (rc) {
1737  // We are going to execute the single statement, so we should count it.
1738  KMP_COUNT_BLOCK(OMP_SINGLE);
1739  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1740  }
1741 
1742 #if OMPT_SUPPORT && OMPT_OPTIONAL
1743  kmp_info_t *this_thr = __kmp_threads[global_tid];
1744  kmp_team_t *team = this_thr->th.th_team;
1745  int tid = __kmp_tid_from_gtid(global_tid);
1746 
1747  if (ompt_enabled.enabled) {
1748  if (rc) {
1749  if (ompt_enabled.ompt_callback_work) {
1750  ompt_callbacks.ompt_callback(ompt_callback_work)(
1751  ompt_work_single_executor, ompt_scope_begin,
1752  &(team->t.ompt_team_info.parallel_data),
1753  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1754  1, OMPT_GET_RETURN_ADDRESS(0));
1755  }
1756  } else {
1757  if (ompt_enabled.ompt_callback_work) {
1758  ompt_callbacks.ompt_callback(ompt_callback_work)(
1759  ompt_work_single_other, ompt_scope_begin,
1760  &(team->t.ompt_team_info.parallel_data),
1761  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1762  1, OMPT_GET_RETURN_ADDRESS(0));
1763  ompt_callbacks.ompt_callback(ompt_callback_work)(
1764  ompt_work_single_other, ompt_scope_end,
1765  &(team->t.ompt_team_info.parallel_data),
1766  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1767  1, OMPT_GET_RETURN_ADDRESS(0));
1768  }
1769  }
1770  }
1771 #endif
1772 
1773  return rc;
1774 }
1775 
1785 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1786  __kmp_exit_single(global_tid);
1787  KMP_POP_PARTITIONED_TIMER();
1788 
1789 #if OMPT_SUPPORT && OMPT_OPTIONAL
1790  kmp_info_t *this_thr = __kmp_threads[global_tid];
1791  kmp_team_t *team = this_thr->th.th_team;
1792  int tid = __kmp_tid_from_gtid(global_tid);
1793 
1794  if (ompt_enabled.ompt_callback_work) {
1795  ompt_callbacks.ompt_callback(ompt_callback_work)(
1796  ompt_work_single_executor, ompt_scope_end,
1797  &(team->t.ompt_team_info.parallel_data),
1798  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1799  OMPT_GET_RETURN_ADDRESS(0));
1800  }
1801 #endif
1802 }
1803 
1811 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1812  KMP_POP_PARTITIONED_TIMER();
1813  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1814 
1815 #if OMPT_SUPPORT && OMPT_OPTIONAL
1816  if (ompt_enabled.ompt_callback_work) {
1817  ompt_work_t ompt_work_type = ompt_work_loop;
1818  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1819  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1820  // Determine workshare type
1821  if (loc != NULL) {
1822  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1823  ompt_work_type = ompt_work_loop;
1824  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1825  ompt_work_type = ompt_work_sections;
1826  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1827  ompt_work_type = ompt_work_distribute;
1828  } else {
1829  // use default set above.
1830  // a warning about this case is provided in __kmpc_for_static_init
1831  }
1832  KMP_DEBUG_ASSERT(ompt_work_type);
1833  }
1834  ompt_callbacks.ompt_callback(ompt_callback_work)(
1835  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1836  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1837  }
1838 #endif
1839  if (__kmp_env_consistency_check)
1840  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1841 }
1842 
1843 // User routines which take C-style arguments (call by value)
1844 // different from the Fortran equivalent routines
1845 
1846 void ompc_set_num_threads(int arg) {
1847  // !!!!! TODO: check the per-task binding
1848  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1849 }
1850 
1851 void ompc_set_dynamic(int flag) {
1852  kmp_info_t *thread;
1853 
1854  /* For the thread-private implementation of the internal controls */
1855  thread = __kmp_entry_thread();
1856 
1857  __kmp_save_internal_controls(thread);
1858 
1859  set__dynamic(thread, flag ? TRUE : FALSE);
1860 }
1861 
1862 void ompc_set_nested(int flag) {
1863  kmp_info_t *thread;
1864 
1865  /* For the thread-private internal controls implementation */
1866  thread = __kmp_entry_thread();
1867 
1868  __kmp_save_internal_controls(thread);
1869 
1870  set__nested(thread, flag ? TRUE : FALSE);
1871 }
1872 
1873 void ompc_set_max_active_levels(int max_active_levels) {
1874  /* TO DO */
1875  /* we want per-task implementation of this internal control */
1876 
1877  /* For the per-thread internal controls implementation */
1878  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1879 }
1880 
1881 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1882  // !!!!! TODO: check the per-task binding
1883  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1884 }
1885 
1886 int ompc_get_ancestor_thread_num(int level) {
1887  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1888 }
1889 
1890 int ompc_get_team_size(int level) {
1891  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1892 }
1893 
1894 #if OMP_50_ENABLED
1895 /* OpenMP 5.0 Affinity Format API */
1896 
1897 void ompc_set_affinity_format(char const *format) {
1898  if (!__kmp_init_serial) {
1899  __kmp_serial_initialize();
1900  }
1901  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1902  format, KMP_STRLEN(format) + 1);
1903 }
1904 
1905 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1906  size_t format_size;
1907  if (!__kmp_init_serial) {
1908  __kmp_serial_initialize();
1909  }
1910  format_size = KMP_STRLEN(__kmp_affinity_format);
1911  if (buffer && size) {
1912  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1913  format_size + 1);
1914  }
1915  return format_size;
1916 }
1917 
1918 void ompc_display_affinity(char const *format) {
1919  int gtid;
1920  if (!TCR_4(__kmp_init_middle)) {
1921  __kmp_middle_initialize();
1922  }
1923  gtid = __kmp_get_gtid();
1924  __kmp_aux_display_affinity(gtid, format);
1925 }
1926 
1927 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1928  char const *format) {
1929  int gtid;
1930  size_t num_required;
1931  kmp_str_buf_t capture_buf;
1932  if (!TCR_4(__kmp_init_middle)) {
1933  __kmp_middle_initialize();
1934  }
1935  gtid = __kmp_get_gtid();
1936  __kmp_str_buf_init(&capture_buf);
1937  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1938  if (buffer && buf_size) {
1939  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1940  capture_buf.used + 1);
1941  }
1942  __kmp_str_buf_free(&capture_buf);
1943  return num_required;
1944 }
1945 #endif /* OMP_50_ENABLED */
1946 
1947 void kmpc_set_stacksize(int arg) {
1948  // __kmp_aux_set_stacksize initializes the library if needed
1949  __kmp_aux_set_stacksize(arg);
1950 }
1951 
1952 void kmpc_set_stacksize_s(size_t arg) {
1953  // __kmp_aux_set_stacksize initializes the library if needed
1954  __kmp_aux_set_stacksize(arg);
1955 }
1956 
1957 void kmpc_set_blocktime(int arg) {
1958  int gtid, tid;
1959  kmp_info_t *thread;
1960 
1961  gtid = __kmp_entry_gtid();
1962  tid = __kmp_tid_from_gtid(gtid);
1963  thread = __kmp_thread_from_gtid(gtid);
1964 
1965  __kmp_aux_set_blocktime(arg, thread, tid);
1966 }
1967 
1968 void kmpc_set_library(int arg) {
1969  // __kmp_user_set_library initializes the library if needed
1970  __kmp_user_set_library((enum library_type)arg);
1971 }
1972 
1973 void kmpc_set_defaults(char const *str) {
1974  // __kmp_aux_set_defaults initializes the library if needed
1975  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1976 }
1977 
1978 void kmpc_set_disp_num_buffers(int arg) {
1979  // ignore after initialization because some teams have already
1980  // allocated dispatch buffers
1981  if (__kmp_init_serial == 0 && arg > 0)
1982  __kmp_dispatch_num_buffers = arg;
1983 }
1984 
1985 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1986 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1987  return -1;
1988 #else
1989  if (!TCR_4(__kmp_init_middle)) {
1990  __kmp_middle_initialize();
1991  }
1992  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1993 #endif
1994 }
1995 
1996 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1997 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1998  return -1;
1999 #else
2000  if (!TCR_4(__kmp_init_middle)) {
2001  __kmp_middle_initialize();
2002  }
2003  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2004 #endif
2005 }
2006 
2007 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2008 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2009  return -1;
2010 #else
2011  if (!TCR_4(__kmp_init_middle)) {
2012  __kmp_middle_initialize();
2013  }
2014  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2015 #endif
2016 }
2017 
2018 /* -------------------------------------------------------------------------- */
2063 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2064  void *cpy_data, void (*cpy_func)(void *, void *),
2065  kmp_int32 didit) {
2066  void **data_ptr;
2067 
2068  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2069 
2070  KMP_MB();
2071 
2072  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2073 
2074  if (__kmp_env_consistency_check) {
2075  if (loc == 0) {
2076  KMP_WARNING(ConstructIdentInvalid);
2077  }
2078  }
2079 
2080  // ToDo: Optimize the following two barriers into some kind of split barrier
2081 
2082  if (didit)
2083  *data_ptr = cpy_data;
2084 
2085 #if OMPT_SUPPORT
2086  ompt_frame_t *ompt_frame;
2087  if (ompt_enabled.enabled) {
2088  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2089  if (ompt_frame->enter_frame.ptr == NULL)
2090  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2091  OMPT_STORE_RETURN_ADDRESS(gtid);
2092  }
2093 #endif
2094 /* This barrier is not a barrier region boundary */
2095 #if USE_ITT_NOTIFY
2096  __kmp_threads[gtid]->th.th_ident = loc;
2097 #endif
2098  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2099 
2100  if (!didit)
2101  (*cpy_func)(cpy_data, *data_ptr);
2102 
2103 // Consider next barrier a user-visible barrier for barrier region boundaries
2104 // Nesting checks are already handled by the single construct checks
2105 
2106 #if OMPT_SUPPORT
2107  if (ompt_enabled.enabled) {
2108  OMPT_STORE_RETURN_ADDRESS(gtid);
2109  }
2110 #endif
2111 #if USE_ITT_NOTIFY
2112  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2113 // tasks can overwrite the location)
2114 #endif
2115  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2116 #if OMPT_SUPPORT && OMPT_OPTIONAL
2117  if (ompt_enabled.enabled) {
2118  ompt_frame->enter_frame = ompt_data_none;
2119  }
2120 #endif
2121 }
2122 
2123 /* -------------------------------------------------------------------------- */
2124 
2125 #define INIT_LOCK __kmp_init_user_lock_with_checks
2126 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2127 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2128 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2129 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2130 #define ACQUIRE_NESTED_LOCK_TIMED \
2131  __kmp_acquire_nested_user_lock_with_checks_timed
2132 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2133 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2134 #define TEST_LOCK __kmp_test_user_lock_with_checks
2135 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2136 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2137 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2138 
2139 // TODO: Make check abort messages use location info & pass it into
2140 // with_checks routines
2141 
2142 #if KMP_USE_DYNAMIC_LOCK
2143 
2144 // internal lock initializer
2145 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2146  kmp_dyna_lockseq_t seq) {
2147  if (KMP_IS_D_LOCK(seq)) {
2148  KMP_INIT_D_LOCK(lock, seq);
2149 #if USE_ITT_BUILD
2150  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2151 #endif
2152  } else {
2153  KMP_INIT_I_LOCK(lock, seq);
2154 #if USE_ITT_BUILD
2155  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2156  __kmp_itt_lock_creating(ilk->lock, loc);
2157 #endif
2158  }
2159 }
2160 
2161 // internal nest lock initializer
2162 static __forceinline void
2163 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2164  kmp_dyna_lockseq_t seq) {
2165 #if KMP_USE_TSX
2166  // Don't have nested lock implementation for speculative locks
2167  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2168  seq = __kmp_user_lock_seq;
2169 #endif
2170  switch (seq) {
2171  case lockseq_tas:
2172  seq = lockseq_nested_tas;
2173  break;
2174 #if KMP_USE_FUTEX
2175  case lockseq_futex:
2176  seq = lockseq_nested_futex;
2177  break;
2178 #endif
2179  case lockseq_ticket:
2180  seq = lockseq_nested_ticket;
2181  break;
2182  case lockseq_queuing:
2183  seq = lockseq_nested_queuing;
2184  break;
2185  case lockseq_drdpa:
2186  seq = lockseq_nested_drdpa;
2187  break;
2188  default:
2189  seq = lockseq_nested_queuing;
2190  }
2191  KMP_INIT_I_LOCK(lock, seq);
2192 #if USE_ITT_BUILD
2193  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2194  __kmp_itt_lock_creating(ilk->lock, loc);
2195 #endif
2196 }
2197 
2198 /* initialize the lock with a hint */
2199 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2200  uintptr_t hint) {
2201  KMP_DEBUG_ASSERT(__kmp_init_serial);
2202  if (__kmp_env_consistency_check && user_lock == NULL) {
2203  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2204  }
2205 
2206  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2207 
2208 #if OMPT_SUPPORT && OMPT_OPTIONAL
2209  // This is the case, if called from omp_init_lock_with_hint:
2210  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2211  if (!codeptr)
2212  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2213  if (ompt_enabled.ompt_callback_lock_init) {
2214  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2215  ompt_mutex_lock, (omp_lock_hint_t)hint,
2216  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2217  codeptr);
2218  }
2219 #endif
2220 }
2221 
2222 /* initialize the lock with a hint */
2223 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2224  void **user_lock, uintptr_t hint) {
2225  KMP_DEBUG_ASSERT(__kmp_init_serial);
2226  if (__kmp_env_consistency_check && user_lock == NULL) {
2227  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2228  }
2229 
2230  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2231 
2232 #if OMPT_SUPPORT && OMPT_OPTIONAL
2233  // This is the case, if called from omp_init_lock_with_hint:
2234  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2235  if (!codeptr)
2236  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2237  if (ompt_enabled.ompt_callback_lock_init) {
2238  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2239  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2240  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2241  codeptr);
2242  }
2243 #endif
2244 }
2245 
2246 #endif // KMP_USE_DYNAMIC_LOCK
2247 
2248 /* initialize the lock */
2249 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2250 #if KMP_USE_DYNAMIC_LOCK
2251 
2252  KMP_DEBUG_ASSERT(__kmp_init_serial);
2253  if (__kmp_env_consistency_check && user_lock == NULL) {
2254  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2255  }
2256  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2257 
2258 #if OMPT_SUPPORT && OMPT_OPTIONAL
2259  // This is the case, if called from omp_init_lock_with_hint:
2260  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2261  if (!codeptr)
2262  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2263  if (ompt_enabled.ompt_callback_lock_init) {
2264  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2265  ompt_mutex_lock, omp_lock_hint_none,
2266  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2267  codeptr);
2268  }
2269 #endif
2270 
2271 #else // KMP_USE_DYNAMIC_LOCK
2272 
2273  static char const *const func = "omp_init_lock";
2274  kmp_user_lock_p lck;
2275  KMP_DEBUG_ASSERT(__kmp_init_serial);
2276 
2277  if (__kmp_env_consistency_check) {
2278  if (user_lock == NULL) {
2279  KMP_FATAL(LockIsUninitialized, func);
2280  }
2281  }
2282 
2283  KMP_CHECK_USER_LOCK_INIT();
2284 
2285  if ((__kmp_user_lock_kind == lk_tas) &&
2286  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2287  lck = (kmp_user_lock_p)user_lock;
2288  }
2289 #if KMP_USE_FUTEX
2290  else if ((__kmp_user_lock_kind == lk_futex) &&
2291  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2292  lck = (kmp_user_lock_p)user_lock;
2293  }
2294 #endif
2295  else {
2296  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2297  }
2298  INIT_LOCK(lck);
2299  __kmp_set_user_lock_location(lck, loc);
2300 
2301 #if OMPT_SUPPORT && OMPT_OPTIONAL
2302  // This is the case, if called from omp_init_lock_with_hint:
2303  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2304  if (!codeptr)
2305  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2306  if (ompt_enabled.ompt_callback_lock_init) {
2307  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2308  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2309  (ompt_wait_id_t)user_lock, codeptr);
2310  }
2311 #endif
2312 
2313 #if USE_ITT_BUILD
2314  __kmp_itt_lock_creating(lck);
2315 #endif /* USE_ITT_BUILD */
2316 
2317 #endif // KMP_USE_DYNAMIC_LOCK
2318 } // __kmpc_init_lock
2319 
2320 /* initialize the lock */
2321 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2322 #if KMP_USE_DYNAMIC_LOCK
2323 
2324  KMP_DEBUG_ASSERT(__kmp_init_serial);
2325  if (__kmp_env_consistency_check && user_lock == NULL) {
2326  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2327  }
2328  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2329 
2330 #if OMPT_SUPPORT && OMPT_OPTIONAL
2331  // This is the case, if called from omp_init_lock_with_hint:
2332  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2333  if (!codeptr)
2334  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2335  if (ompt_enabled.ompt_callback_lock_init) {
2336  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2337  ompt_mutex_nest_lock, omp_lock_hint_none,
2338  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2339  codeptr);
2340  }
2341 #endif
2342 
2343 #else // KMP_USE_DYNAMIC_LOCK
2344 
2345  static char const *const func = "omp_init_nest_lock";
2346  kmp_user_lock_p lck;
2347  KMP_DEBUG_ASSERT(__kmp_init_serial);
2348 
2349  if (__kmp_env_consistency_check) {
2350  if (user_lock == NULL) {
2351  KMP_FATAL(LockIsUninitialized, func);
2352  }
2353  }
2354 
2355  KMP_CHECK_USER_LOCK_INIT();
2356 
2357  if ((__kmp_user_lock_kind == lk_tas) &&
2358  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2359  OMP_NEST_LOCK_T_SIZE)) {
2360  lck = (kmp_user_lock_p)user_lock;
2361  }
2362 #if KMP_USE_FUTEX
2363  else if ((__kmp_user_lock_kind == lk_futex) &&
2364  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2365  OMP_NEST_LOCK_T_SIZE)) {
2366  lck = (kmp_user_lock_p)user_lock;
2367  }
2368 #endif
2369  else {
2370  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2371  }
2372 
2373  INIT_NESTED_LOCK(lck);
2374  __kmp_set_user_lock_location(lck, loc);
2375 
2376 #if OMPT_SUPPORT && OMPT_OPTIONAL
2377  // This is the case, if called from omp_init_lock_with_hint:
2378  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2379  if (!codeptr)
2380  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2381  if (ompt_enabled.ompt_callback_lock_init) {
2382  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2383  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2384  (ompt_wait_id_t)user_lock, codeptr);
2385  }
2386 #endif
2387 
2388 #if USE_ITT_BUILD
2389  __kmp_itt_lock_creating(lck);
2390 #endif /* USE_ITT_BUILD */
2391 
2392 #endif // KMP_USE_DYNAMIC_LOCK
2393 } // __kmpc_init_nest_lock
2394 
2395 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2396 #if KMP_USE_DYNAMIC_LOCK
2397 
2398 #if USE_ITT_BUILD
2399  kmp_user_lock_p lck;
2400  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2401  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2402  } else {
2403  lck = (kmp_user_lock_p)user_lock;
2404  }
2405  __kmp_itt_lock_destroyed(lck);
2406 #endif
2407 #if OMPT_SUPPORT && OMPT_OPTIONAL
2408  // This is the case, if called from omp_init_lock_with_hint:
2409  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2410  if (!codeptr)
2411  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412  if (ompt_enabled.ompt_callback_lock_destroy) {
2413  kmp_user_lock_p lck;
2414  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2415  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2416  } else {
2417  lck = (kmp_user_lock_p)user_lock;
2418  }
2419  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2420  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2421  }
2422 #endif
2423  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2424 #else
2425  kmp_user_lock_p lck;
2426 
2427  if ((__kmp_user_lock_kind == lk_tas) &&
2428  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2429  lck = (kmp_user_lock_p)user_lock;
2430  }
2431 #if KMP_USE_FUTEX
2432  else if ((__kmp_user_lock_kind == lk_futex) &&
2433  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2434  lck = (kmp_user_lock_p)user_lock;
2435  }
2436 #endif
2437  else {
2438  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2439  }
2440 
2441 #if OMPT_SUPPORT && OMPT_OPTIONAL
2442  // This is the case, if called from omp_init_lock_with_hint:
2443  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2444  if (!codeptr)
2445  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2446  if (ompt_enabled.ompt_callback_lock_destroy) {
2447  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2448  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2449  }
2450 #endif
2451 
2452 #if USE_ITT_BUILD
2453  __kmp_itt_lock_destroyed(lck);
2454 #endif /* USE_ITT_BUILD */
2455  DESTROY_LOCK(lck);
2456 
2457  if ((__kmp_user_lock_kind == lk_tas) &&
2458  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2459  ;
2460  }
2461 #if KMP_USE_FUTEX
2462  else if ((__kmp_user_lock_kind == lk_futex) &&
2463  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2464  ;
2465  }
2466 #endif
2467  else {
2468  __kmp_user_lock_free(user_lock, gtid, lck);
2469  }
2470 #endif // KMP_USE_DYNAMIC_LOCK
2471 } // __kmpc_destroy_lock
2472 
2473 /* destroy the lock */
2474 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2475 #if KMP_USE_DYNAMIC_LOCK
2476 
2477 #if USE_ITT_BUILD
2478  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2479  __kmp_itt_lock_destroyed(ilk->lock);
2480 #endif
2481 #if OMPT_SUPPORT && OMPT_OPTIONAL
2482  // This is the case, if called from omp_init_lock_with_hint:
2483  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2484  if (!codeptr)
2485  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2486  if (ompt_enabled.ompt_callback_lock_destroy) {
2487  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2488  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2489  }
2490 #endif
2491  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2492 
2493 #else // KMP_USE_DYNAMIC_LOCK
2494 
2495  kmp_user_lock_p lck;
2496 
2497  if ((__kmp_user_lock_kind == lk_tas) &&
2498  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2499  OMP_NEST_LOCK_T_SIZE)) {
2500  lck = (kmp_user_lock_p)user_lock;
2501  }
2502 #if KMP_USE_FUTEX
2503  else if ((__kmp_user_lock_kind == lk_futex) &&
2504  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2505  OMP_NEST_LOCK_T_SIZE)) {
2506  lck = (kmp_user_lock_p)user_lock;
2507  }
2508 #endif
2509  else {
2510  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2511  }
2512 
2513 #if OMPT_SUPPORT && OMPT_OPTIONAL
2514  // This is the case, if called from omp_init_lock_with_hint:
2515  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2516  if (!codeptr)
2517  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2518  if (ompt_enabled.ompt_callback_lock_destroy) {
2519  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2520  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2521  }
2522 #endif
2523 
2524 #if USE_ITT_BUILD
2525  __kmp_itt_lock_destroyed(lck);
2526 #endif /* USE_ITT_BUILD */
2527 
2528  DESTROY_NESTED_LOCK(lck);
2529 
2530  if ((__kmp_user_lock_kind == lk_tas) &&
2531  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2532  OMP_NEST_LOCK_T_SIZE)) {
2533  ;
2534  }
2535 #if KMP_USE_FUTEX
2536  else if ((__kmp_user_lock_kind == lk_futex) &&
2537  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2538  OMP_NEST_LOCK_T_SIZE)) {
2539  ;
2540  }
2541 #endif
2542  else {
2543  __kmp_user_lock_free(user_lock, gtid, lck);
2544  }
2545 #endif // KMP_USE_DYNAMIC_LOCK
2546 } // __kmpc_destroy_nest_lock
2547 
2548 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2549  KMP_COUNT_BLOCK(OMP_set_lock);
2550 #if KMP_USE_DYNAMIC_LOCK
2551  int tag = KMP_EXTRACT_D_TAG(user_lock);
2552 #if USE_ITT_BUILD
2553  __kmp_itt_lock_acquiring(
2554  (kmp_user_lock_p)
2555  user_lock); // itt function will get to the right lock object.
2556 #endif
2557 #if OMPT_SUPPORT && OMPT_OPTIONAL
2558  // This is the case, if called from omp_init_lock_with_hint:
2559  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2560  if (!codeptr)
2561  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2562  if (ompt_enabled.ompt_callback_mutex_acquire) {
2563  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2564  ompt_mutex_lock, omp_lock_hint_none,
2565  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2566  codeptr);
2567  }
2568 #endif
2569 #if KMP_USE_INLINED_TAS
2570  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2571  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2572  } else
2573 #elif KMP_USE_INLINED_FUTEX
2574  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2575  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2576  } else
2577 #endif
2578  {
2579  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2580  }
2581 #if USE_ITT_BUILD
2582  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2583 #endif
2584 #if OMPT_SUPPORT && OMPT_OPTIONAL
2585  if (ompt_enabled.ompt_callback_mutex_acquired) {
2586  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2587  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2588  }
2589 #endif
2590 
2591 #else // KMP_USE_DYNAMIC_LOCK
2592 
2593  kmp_user_lock_p lck;
2594 
2595  if ((__kmp_user_lock_kind == lk_tas) &&
2596  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2597  lck = (kmp_user_lock_p)user_lock;
2598  }
2599 #if KMP_USE_FUTEX
2600  else if ((__kmp_user_lock_kind == lk_futex) &&
2601  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2602  lck = (kmp_user_lock_p)user_lock;
2603  }
2604 #endif
2605  else {
2606  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2607  }
2608 
2609 #if USE_ITT_BUILD
2610  __kmp_itt_lock_acquiring(lck);
2611 #endif /* USE_ITT_BUILD */
2612 #if OMPT_SUPPORT && OMPT_OPTIONAL
2613  // This is the case, if called from omp_init_lock_with_hint:
2614  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2615  if (!codeptr)
2616  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2617  if (ompt_enabled.ompt_callback_mutex_acquire) {
2618  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2619  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2620  (ompt_wait_id_t)lck, codeptr);
2621  }
2622 #endif
2623 
2624  ACQUIRE_LOCK(lck, gtid);
2625 
2626 #if USE_ITT_BUILD
2627  __kmp_itt_lock_acquired(lck);
2628 #endif /* USE_ITT_BUILD */
2629 
2630 #if OMPT_SUPPORT && OMPT_OPTIONAL
2631  if (ompt_enabled.ompt_callback_mutex_acquired) {
2632  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2633  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2634  }
2635 #endif
2636 
2637 #endif // KMP_USE_DYNAMIC_LOCK
2638 }
2639 
2640 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2641 #if KMP_USE_DYNAMIC_LOCK
2642 
2643 #if USE_ITT_BUILD
2644  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2645 #endif
2646 #if OMPT_SUPPORT && OMPT_OPTIONAL
2647  // This is the case, if called from omp_init_lock_with_hint:
2648  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2649  if (!codeptr)
2650  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2651  if (ompt_enabled.enabled) {
2652  if (ompt_enabled.ompt_callback_mutex_acquire) {
2653  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2654  ompt_mutex_nest_lock, omp_lock_hint_none,
2655  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2656  codeptr);
2657  }
2658  }
2659 #endif
2660  int acquire_status =
2661  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2662  (void) acquire_status;
2663 #if USE_ITT_BUILD
2664  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2665 #endif
2666 
2667 #if OMPT_SUPPORT && OMPT_OPTIONAL
2668  if (ompt_enabled.enabled) {
2669  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2670  if (ompt_enabled.ompt_callback_mutex_acquired) {
2671  // lock_first
2672  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2673  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2674  }
2675  } else {
2676  if (ompt_enabled.ompt_callback_nest_lock) {
2677  // lock_next
2678  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2679  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
2680  }
2681  }
2682  }
2683 #endif
2684 
2685 #else // KMP_USE_DYNAMIC_LOCK
2686  int acquire_status;
2687  kmp_user_lock_p lck;
2688 
2689  if ((__kmp_user_lock_kind == lk_tas) &&
2690  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2691  OMP_NEST_LOCK_T_SIZE)) {
2692  lck = (kmp_user_lock_p)user_lock;
2693  }
2694 #if KMP_USE_FUTEX
2695  else if ((__kmp_user_lock_kind == lk_futex) &&
2696  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2697  OMP_NEST_LOCK_T_SIZE)) {
2698  lck = (kmp_user_lock_p)user_lock;
2699  }
2700 #endif
2701  else {
2702  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2703  }
2704 
2705 #if USE_ITT_BUILD
2706  __kmp_itt_lock_acquiring(lck);
2707 #endif /* USE_ITT_BUILD */
2708 #if OMPT_SUPPORT && OMPT_OPTIONAL
2709  // This is the case, if called from omp_init_lock_with_hint:
2710  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2711  if (!codeptr)
2712  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2713  if (ompt_enabled.enabled) {
2714  if (ompt_enabled.ompt_callback_mutex_acquire) {
2715  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2716  ompt_mutex_nest_lock, omp_lock_hint_none,
2717  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
2718  }
2719  }
2720 #endif
2721 
2722  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2723 
2724 #if USE_ITT_BUILD
2725  __kmp_itt_lock_acquired(lck);
2726 #endif /* USE_ITT_BUILD */
2727 
2728 #if OMPT_SUPPORT && OMPT_OPTIONAL
2729  if (ompt_enabled.enabled) {
2730  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2731  if (ompt_enabled.ompt_callback_mutex_acquired) {
2732  // lock_first
2733  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2734  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2735  }
2736  } else {
2737  if (ompt_enabled.ompt_callback_nest_lock) {
2738  // lock_next
2739  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2740  ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
2741  }
2742  }
2743  }
2744 #endif
2745 
2746 #endif // KMP_USE_DYNAMIC_LOCK
2747 }
2748 
2749 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2750 #if KMP_USE_DYNAMIC_LOCK
2751 
2752  int tag = KMP_EXTRACT_D_TAG(user_lock);
2753 #if USE_ITT_BUILD
2754  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2755 #endif
2756 #if KMP_USE_INLINED_TAS
2757  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2758  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2759  } else
2760 #elif KMP_USE_INLINED_FUTEX
2761  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2762  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2763  } else
2764 #endif
2765  {
2766  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2767  }
2768 
2769 #if OMPT_SUPPORT && OMPT_OPTIONAL
2770  // This is the case, if called from omp_init_lock_with_hint:
2771  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2772  if (!codeptr)
2773  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2774  if (ompt_enabled.ompt_callback_mutex_released) {
2775  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2776  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
2777  }
2778 #endif
2779 
2780 #else // KMP_USE_DYNAMIC_LOCK
2781 
2782  kmp_user_lock_p lck;
2783 
2784  /* Can't use serial interval since not block structured */
2785  /* release the lock */
2786 
2787  if ((__kmp_user_lock_kind == lk_tas) &&
2788  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2789 #if KMP_OS_LINUX && \
2790  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2791 // "fast" path implemented to fix customer performance issue
2792 #if USE_ITT_BUILD
2793  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2794 #endif /* USE_ITT_BUILD */
2795  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2796  KMP_MB();
2797 
2798 #if OMPT_SUPPORT && OMPT_OPTIONAL
2799  // This is the case, if called from omp_init_lock_with_hint:
2800  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2801  if (!codeptr)
2802  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2803  if (ompt_enabled.ompt_callback_mutex_released) {
2804  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2805  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2806  }
2807 #endif
2808 
2809  return;
2810 #else
2811  lck = (kmp_user_lock_p)user_lock;
2812 #endif
2813  }
2814 #if KMP_USE_FUTEX
2815  else if ((__kmp_user_lock_kind == lk_futex) &&
2816  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2817  lck = (kmp_user_lock_p)user_lock;
2818  }
2819 #endif
2820  else {
2821  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2822  }
2823 
2824 #if USE_ITT_BUILD
2825  __kmp_itt_lock_releasing(lck);
2826 #endif /* USE_ITT_BUILD */
2827 
2828  RELEASE_LOCK(lck, gtid);
2829 
2830 #if OMPT_SUPPORT && OMPT_OPTIONAL
2831  // This is the case, if called from omp_init_lock_with_hint:
2832  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2833  if (!codeptr)
2834  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2835  if (ompt_enabled.ompt_callback_mutex_released) {
2836  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2837  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
2838  }
2839 #endif
2840 
2841 #endif // KMP_USE_DYNAMIC_LOCK
2842 }
2843 
2844 /* release the lock */
2845 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2846 #if KMP_USE_DYNAMIC_LOCK
2847 
2848 #if USE_ITT_BUILD
2849  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2850 #endif
2851  int release_status =
2852  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2853  (void) release_status;
2854 
2855 #if OMPT_SUPPORT && OMPT_OPTIONAL
2856  // This is the case, if called from omp_init_lock_with_hint:
2857  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2858  if (!codeptr)
2859  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2860  if (ompt_enabled.enabled) {
2861  if (release_status == KMP_LOCK_RELEASED) {
2862  if (ompt_enabled.ompt_callback_mutex_released) {
2863  // release_lock_last
2864  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2865  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
2866  }
2867  } else if (ompt_enabled.ompt_callback_nest_lock) {
2868  // release_lock_prev
2869  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2870  ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
2871  }
2872  }
2873 #endif
2874 
2875 #else // KMP_USE_DYNAMIC_LOCK
2876 
2877  kmp_user_lock_p lck;
2878 
2879  /* Can't use serial interval since not block structured */
2880 
2881  if ((__kmp_user_lock_kind == lk_tas) &&
2882  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2883  OMP_NEST_LOCK_T_SIZE)) {
2884 #if KMP_OS_LINUX && \
2885  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2886  // "fast" path implemented to fix customer performance issue
2887  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2888 #if USE_ITT_BUILD
2889  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2890 #endif /* USE_ITT_BUILD */
2891 
2892 #if OMPT_SUPPORT && OMPT_OPTIONAL
2893  int release_status = KMP_LOCK_STILL_HELD;
2894 #endif
2895 
2896  if (--(tl->lk.depth_locked) == 0) {
2897  TCW_4(tl->lk.poll, 0);
2898 #if OMPT_SUPPORT && OMPT_OPTIONAL
2899  release_status = KMP_LOCK_RELEASED;
2900 #endif
2901  }
2902  KMP_MB();
2903 
2904 #if OMPT_SUPPORT && OMPT_OPTIONAL
2905  // This is the case, if called from omp_init_lock_with_hint:
2906  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907  if (!codeptr)
2908  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909  if (ompt_enabled.enabled) {
2910  if (release_status == KMP_LOCK_RELEASED) {
2911  if (ompt_enabled.ompt_callback_mutex_released) {
2912  // release_lock_last
2913  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2914  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2915  }
2916  } else if (ompt_enabled.ompt_callback_nest_lock) {
2917  // release_lock_previous
2918  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2919  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2920  }
2921  }
2922 #endif
2923 
2924  return;
2925 #else
2926  lck = (kmp_user_lock_p)user_lock;
2927 #endif
2928  }
2929 #if KMP_USE_FUTEX
2930  else if ((__kmp_user_lock_kind == lk_futex) &&
2931  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2932  OMP_NEST_LOCK_T_SIZE)) {
2933  lck = (kmp_user_lock_p)user_lock;
2934  }
2935 #endif
2936  else {
2937  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2938  }
2939 
2940 #if USE_ITT_BUILD
2941  __kmp_itt_lock_releasing(lck);
2942 #endif /* USE_ITT_BUILD */
2943 
2944  int release_status;
2945  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2946 #if OMPT_SUPPORT && OMPT_OPTIONAL
2947  // This is the case, if called from omp_init_lock_with_hint:
2948  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2949  if (!codeptr)
2950  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2951  if (ompt_enabled.enabled) {
2952  if (release_status == KMP_LOCK_RELEASED) {
2953  if (ompt_enabled.ompt_callback_mutex_released) {
2954  // release_lock_last
2955  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2956  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
2957  }
2958  } else if (ompt_enabled.ompt_callback_nest_lock) {
2959  // release_lock_previous
2960  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2961  ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
2962  }
2963  }
2964 #endif
2965 
2966 #endif // KMP_USE_DYNAMIC_LOCK
2967 }
2968 
2969 /* try to acquire the lock */
2970 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2971  KMP_COUNT_BLOCK(OMP_test_lock);
2972 
2973 #if KMP_USE_DYNAMIC_LOCK
2974  int rc;
2975  int tag = KMP_EXTRACT_D_TAG(user_lock);
2976 #if USE_ITT_BUILD
2977  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2978 #endif
2979 #if OMPT_SUPPORT && OMPT_OPTIONAL
2980  // This is the case, if called from omp_init_lock_with_hint:
2981  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2982  if (!codeptr)
2983  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2984  if (ompt_enabled.ompt_callback_mutex_acquire) {
2985  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2986  ompt_mutex_lock, omp_lock_hint_none,
2987  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
2988  codeptr);
2989  }
2990 #endif
2991 #if KMP_USE_INLINED_TAS
2992  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2993  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2994  } else
2995 #elif KMP_USE_INLINED_FUTEX
2996  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2997  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2998  } else
2999 #endif
3000  {
3001  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3002  }
3003  if (rc) {
3004 #if USE_ITT_BUILD
3005  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3006 #endif
3007 #if OMPT_SUPPORT && OMPT_OPTIONAL
3008  if (ompt_enabled.ompt_callback_mutex_acquired) {
3009  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3010  ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
3011  }
3012 #endif
3013  return FTN_TRUE;
3014  } else {
3015 #if USE_ITT_BUILD
3016  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3017 #endif
3018  return FTN_FALSE;
3019  }
3020 
3021 #else // KMP_USE_DYNAMIC_LOCK
3022 
3023  kmp_user_lock_p lck;
3024  int rc;
3025 
3026  if ((__kmp_user_lock_kind == lk_tas) &&
3027  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3028  lck = (kmp_user_lock_p)user_lock;
3029  }
3030 #if KMP_USE_FUTEX
3031  else if ((__kmp_user_lock_kind == lk_futex) &&
3032  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3033  lck = (kmp_user_lock_p)user_lock;
3034  }
3035 #endif
3036  else {
3037  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3038  }
3039 
3040 #if USE_ITT_BUILD
3041  __kmp_itt_lock_acquiring(lck);
3042 #endif /* USE_ITT_BUILD */
3043 #if OMPT_SUPPORT && OMPT_OPTIONAL
3044  // This is the case, if called from omp_init_lock_with_hint:
3045  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3046  if (!codeptr)
3047  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3048  if (ompt_enabled.ompt_callback_mutex_acquire) {
3049  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3050  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3051  (ompt_wait_id_t)lck, codeptr);
3052  }
3053 #endif
3054 
3055  rc = TEST_LOCK(lck, gtid);
3056 #if USE_ITT_BUILD
3057  if (rc) {
3058  __kmp_itt_lock_acquired(lck);
3059  } else {
3060  __kmp_itt_lock_cancelled(lck);
3061  }
3062 #endif /* USE_ITT_BUILD */
3063 #if OMPT_SUPPORT && OMPT_OPTIONAL
3064  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3065  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3066  ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
3067  }
3068 #endif
3069 
3070  return (rc ? FTN_TRUE : FTN_FALSE);
3071 
3072 /* Can't use serial interval since not block structured */
3073 
3074 #endif // KMP_USE_DYNAMIC_LOCK
3075 }
3076 
3077 /* try to acquire the lock */
3078 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3079 #if KMP_USE_DYNAMIC_LOCK
3080  int rc;
3081 #if USE_ITT_BUILD
3082  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3083 #endif
3084 #if OMPT_SUPPORT && OMPT_OPTIONAL
3085  // This is the case, if called from omp_init_lock_with_hint:
3086  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3087  if (!codeptr)
3088  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3089  if (ompt_enabled.ompt_callback_mutex_acquire) {
3090  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3091  ompt_mutex_nest_lock, omp_lock_hint_none,
3092  __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
3093  codeptr);
3094  }
3095 #endif
3096  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3097 #if USE_ITT_BUILD
3098  if (rc) {
3099  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3100  } else {
3101  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3102  }
3103 #endif
3104 #if OMPT_SUPPORT && OMPT_OPTIONAL
3105  if (ompt_enabled.enabled && rc) {
3106  if (rc == 1) {
3107  if (ompt_enabled.ompt_callback_mutex_acquired) {
3108  // lock_first
3109  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3110  ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
3111  }
3112  } else {
3113  if (ompt_enabled.ompt_callback_nest_lock) {
3114  // lock_next
3115  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3116  ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
3117  }
3118  }
3119  }
3120 #endif
3121  return rc;
3122 
3123 #else // KMP_USE_DYNAMIC_LOCK
3124 
3125  kmp_user_lock_p lck;
3126  int rc;
3127 
3128  if ((__kmp_user_lock_kind == lk_tas) &&
3129  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3130  OMP_NEST_LOCK_T_SIZE)) {
3131  lck = (kmp_user_lock_p)user_lock;
3132  }
3133 #if KMP_USE_FUTEX
3134  else if ((__kmp_user_lock_kind == lk_futex) &&
3135  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3136  OMP_NEST_LOCK_T_SIZE)) {
3137  lck = (kmp_user_lock_p)user_lock;
3138  }
3139 #endif
3140  else {
3141  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3142  }
3143 
3144 #if USE_ITT_BUILD
3145  __kmp_itt_lock_acquiring(lck);
3146 #endif /* USE_ITT_BUILD */
3147 
3148 #if OMPT_SUPPORT && OMPT_OPTIONAL
3149  // This is the case, if called from omp_init_lock_with_hint:
3150  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3151  if (!codeptr)
3152  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3153  if (ompt_enabled.enabled) &&
3154  ompt_enabled.ompt_callback_mutex_acquire) {
3155  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3156  ompt_mutex_nest_lock, omp_lock_hint_none,
3157  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
3158  }
3159 #endif
3160 
3161  rc = TEST_NESTED_LOCK(lck, gtid);
3162 #if USE_ITT_BUILD
3163  if (rc) {
3164  __kmp_itt_lock_acquired(lck);
3165  } else {
3166  __kmp_itt_lock_cancelled(lck);
3167  }
3168 #endif /* USE_ITT_BUILD */
3169 #if OMPT_SUPPORT && OMPT_OPTIONAL
3170  if (ompt_enabled.enabled && rc) {
3171  if (rc == 1) {
3172  if (ompt_enabled.ompt_callback_mutex_acquired) {
3173  // lock_first
3174  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3175  ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
3176  }
3177  } else {
3178  if (ompt_enabled.ompt_callback_nest_lock) {
3179  // lock_next
3180  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3181  ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
3182  }
3183  }
3184  }
3185 #endif
3186  return rc;
3187 
3188 /* Can't use serial interval since not block structured */
3189 
3190 #endif // KMP_USE_DYNAMIC_LOCK
3191 }
3192 
3193 // Interface to fast scalable reduce methods routines
3194 
3195 // keep the selected method in a thread local structure for cross-function
3196 // usage: will be used in __kmpc_end_reduce* functions;
3197 // another solution: to re-determine the method one more time in
3198 // __kmpc_end_reduce* functions (new prototype required then)
3199 // AT: which solution is better?
3200 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3201  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3202 
3203 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3204  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3205 
3206 // description of the packed_reduction_method variable: look at the macros in
3207 // kmp.h
3208 
3209 // used in a critical section reduce block
3210 static __forceinline void
3211 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3212  kmp_critical_name *crit) {
3213 
3214  // this lock was visible to a customer and to the threading profile tool as a
3215  // serial overhead span (although it's used for an internal purpose only)
3216  // why was it visible in previous implementation?
3217  // should we keep it visible in new reduce block?
3218  kmp_user_lock_p lck;
3219 
3220 #if KMP_USE_DYNAMIC_LOCK
3221 
3222  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3223  // Check if it is initialized.
3224  if (*lk == 0) {
3225  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3226  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3227  KMP_GET_D_TAG(__kmp_user_lock_seq));
3228  } else {
3229  __kmp_init_indirect_csptr(crit, loc, global_tid,
3230  KMP_GET_I_TAG(__kmp_user_lock_seq));
3231  }
3232  }
3233  // Branch for accessing the actual lock object and set operation. This
3234  // branching is inevitable since this lock initialization does not follow the
3235  // normal dispatch path (lock table is not used).
3236  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3237  lck = (kmp_user_lock_p)lk;
3238  KMP_DEBUG_ASSERT(lck != NULL);
3239  if (__kmp_env_consistency_check) {
3240  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3241  }
3242  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3243  } else {
3244  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3245  lck = ilk->lock;
3246  KMP_DEBUG_ASSERT(lck != NULL);
3247  if (__kmp_env_consistency_check) {
3248  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3249  }
3250  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3251  }
3252 
3253 #else // KMP_USE_DYNAMIC_LOCK
3254 
3255  // We know that the fast reduction code is only emitted by Intel compilers
3256  // with 32 byte critical sections. If there isn't enough space, then we
3257  // have to use a pointer.
3258  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3259  lck = (kmp_user_lock_p)crit;
3260  } else {
3261  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3262  }
3263  KMP_DEBUG_ASSERT(lck != NULL);
3264 
3265  if (__kmp_env_consistency_check)
3266  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3267 
3268  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3269 
3270 #endif // KMP_USE_DYNAMIC_LOCK
3271 }
3272 
3273 // used in a critical section reduce block
3274 static __forceinline void
3275 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3276  kmp_critical_name *crit) {
3277 
3278  kmp_user_lock_p lck;
3279 
3280 #if KMP_USE_DYNAMIC_LOCK
3281 
3282  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3283  lck = (kmp_user_lock_p)crit;
3284  if (__kmp_env_consistency_check)
3285  __kmp_pop_sync(global_tid, ct_critical, loc);
3286  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3287  } else {
3288  kmp_indirect_lock_t *ilk =
3289  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3290  if (__kmp_env_consistency_check)
3291  __kmp_pop_sync(global_tid, ct_critical, loc);
3292  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3293  }
3294 
3295 #else // KMP_USE_DYNAMIC_LOCK
3296 
3297  // We know that the fast reduction code is only emitted by Intel compilers
3298  // with 32 byte critical sections. If there isn't enough space, then we have
3299  // to use a pointer.
3300  if (__kmp_base_user_lock_size > 32) {
3301  lck = *((kmp_user_lock_p *)crit);
3302  KMP_ASSERT(lck != NULL);
3303  } else {
3304  lck = (kmp_user_lock_p)crit;
3305  }
3306 
3307  if (__kmp_env_consistency_check)
3308  __kmp_pop_sync(global_tid, ct_critical, loc);
3309 
3310  __kmp_release_user_lock_with_checks(lck, global_tid);
3311 
3312 #endif // KMP_USE_DYNAMIC_LOCK
3313 } // __kmp_end_critical_section_reduce_block
3314 
3315 #if OMP_40_ENABLED
3316 static __forceinline int
3317 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3318  int *task_state) {
3319  kmp_team_t *team;
3320 
3321  // Check if we are inside the teams construct?
3322  if (th->th.th_teams_microtask) {
3323  *team_p = team = th->th.th_team;
3324  if (team->t.t_level == th->th.th_teams_level) {
3325  // This is reduction at teams construct.
3326  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3327  // Let's swap teams temporarily for the reduction.
3328  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3329  th->th.th_team = team->t.t_parent;
3330  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3331  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3332  *task_state = th->th.th_task_state;
3333  th->th.th_task_state = 0;
3334 
3335  return 1;
3336  }
3337  }
3338  return 0;
3339 }
3340 
3341 static __forceinline void
3342 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3343  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3344  th->th.th_info.ds.ds_tid = 0;
3345  th->th.th_team = team;
3346  th->th.th_team_nproc = team->t.t_nproc;
3347  th->th.th_task_team = team->t.t_task_team[task_state];
3348  th->th.th_task_state = task_state;
3349 }
3350 #endif
3351 
3352 /* 2.a.i. Reduce Block without a terminating barrier */
3368 kmp_int32
3369 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3370  size_t reduce_size, void *reduce_data,
3371  void (*reduce_func)(void *lhs_data, void *rhs_data),
3372  kmp_critical_name *lck) {
3373 
3374  KMP_COUNT_BLOCK(REDUCE_nowait);
3375  int retval = 0;
3376  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3377 #if OMP_40_ENABLED
3378  kmp_info_t *th;
3379  kmp_team_t *team;
3380  int teams_swapped = 0, task_state;
3381 #endif
3382  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3383 
3384  // why do we need this initialization here at all?
3385  // Reduction clause can not be used as a stand-alone directive.
3386 
3387  // do not call __kmp_serial_initialize(), it will be called by
3388  // __kmp_parallel_initialize() if needed
3389  // possible detection of false-positive race by the threadchecker ???
3390  if (!TCR_4(__kmp_init_parallel))
3391  __kmp_parallel_initialize();
3392 
3393 #if OMP_50_ENABLED
3394  __kmp_resume_if_soft_paused();
3395 #endif
3396 
3397 // check correctness of reduce block nesting
3398 #if KMP_USE_DYNAMIC_LOCK
3399  if (__kmp_env_consistency_check)
3400  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3401 #else
3402  if (__kmp_env_consistency_check)
3403  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3404 #endif
3405 
3406 #if OMP_40_ENABLED
3407  th = __kmp_thread_from_gtid(global_tid);
3408  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3409 #endif // OMP_40_ENABLED
3410 
3411  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3412  // the value should be kept in a variable
3413  // the variable should be either a construct-specific or thread-specific
3414  // property, not a team specific property
3415  // (a thread can reach the next reduce block on the next construct, reduce
3416  // method may differ on the next construct)
3417  // an ident_t "loc" parameter could be used as a construct-specific property
3418  // (what if loc == 0?)
3419  // (if both construct-specific and team-specific variables were shared,
3420  // then unness extra syncs should be needed)
3421  // a thread-specific variable is better regarding two issues above (next
3422  // construct and extra syncs)
3423  // a thread-specific "th_local.reduction_method" variable is used currently
3424  // each thread executes 'determine' and 'set' lines (no need to execute by one
3425  // thread, to avoid unness extra syncs)
3426 
3427  packed_reduction_method = __kmp_determine_reduction_method(
3428  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3429  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3430 
3431  if (packed_reduction_method == critical_reduce_block) {
3432 
3433  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3434  retval = 1;
3435 
3436  } else if (packed_reduction_method == empty_reduce_block) {
3437 
3438  // usage: if team size == 1, no synchronization is required ( Intel
3439  // platforms only )
3440  retval = 1;
3441 
3442  } else if (packed_reduction_method == atomic_reduce_block) {
3443 
3444  retval = 2;
3445 
3446  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3447  // won't be called by the code gen)
3448  // (it's not quite good, because the checking block has been closed by
3449  // this 'pop',
3450  // but atomic operation has not been executed yet, will be executed
3451  // slightly later, literally on next instruction)
3452  if (__kmp_env_consistency_check)
3453  __kmp_pop_sync(global_tid, ct_reduce, loc);
3454 
3455  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3456  tree_reduce_block)) {
3457 
3458 // AT: performance issue: a real barrier here
3459 // AT: (if master goes slow, other threads are blocked here waiting for the
3460 // master to come and release them)
3461 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3462 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3463 // be confusing to a customer)
3464 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3465 // might go faster and be more in line with sense of NOWAIT
3466 // AT: TO DO: do epcc test and compare times
3467 
3468 // this barrier should be invisible to a customer and to the threading profile
3469 // tool (it's neither a terminating barrier nor customer's code, it's
3470 // used for an internal purpose)
3471 #if OMPT_SUPPORT
3472  // JP: can this barrier potentially leed to task scheduling?
3473  // JP: as long as there is a barrier in the implementation, OMPT should and
3474  // will provide the barrier events
3475  // so we set-up the necessary frame/return addresses.
3476  ompt_frame_t *ompt_frame;
3477  if (ompt_enabled.enabled) {
3478  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3479  if (ompt_frame->enter_frame.ptr == NULL)
3480  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3481  OMPT_STORE_RETURN_ADDRESS(global_tid);
3482  }
3483 #endif
3484 #if USE_ITT_NOTIFY
3485  __kmp_threads[global_tid]->th.th_ident = loc;
3486 #endif
3487  retval =
3488  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3489  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3490  retval = (retval != 0) ? (0) : (1);
3491 #if OMPT_SUPPORT && OMPT_OPTIONAL
3492  if (ompt_enabled.enabled) {
3493  ompt_frame->enter_frame = ompt_data_none;
3494  }
3495 #endif
3496 
3497  // all other workers except master should do this pop here
3498  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3499  if (__kmp_env_consistency_check) {
3500  if (retval == 0) {
3501  __kmp_pop_sync(global_tid, ct_reduce, loc);
3502  }
3503  }
3504 
3505  } else {
3506 
3507  // should never reach this block
3508  KMP_ASSERT(0); // "unexpected method"
3509  }
3510 #if OMP_40_ENABLED
3511  if (teams_swapped) {
3512  __kmp_restore_swapped_teams(th, team, task_state);
3513  }
3514 #endif
3515  KA_TRACE(
3516  10,
3517  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3518  global_tid, packed_reduction_method, retval));
3519 
3520  return retval;
3521 }
3522 
3531 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3532  kmp_critical_name *lck) {
3533 
3534  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3535 
3536  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3537 
3538  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3539 
3540  if (packed_reduction_method == critical_reduce_block) {
3541 
3542  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3543 
3544  } else if (packed_reduction_method == empty_reduce_block) {
3545 
3546  // usage: if team size == 1, no synchronization is required ( on Intel
3547  // platforms only )
3548 
3549  } else if (packed_reduction_method == atomic_reduce_block) {
3550 
3551  // neither master nor other workers should get here
3552  // (code gen does not generate this call in case 2: atomic reduce block)
3553  // actually it's better to remove this elseif at all;
3554  // after removal this value will checked by the 'else' and will assert
3555 
3556  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3557  tree_reduce_block)) {
3558 
3559  // only master gets here
3560 
3561  } else {
3562 
3563  // should never reach this block
3564  KMP_ASSERT(0); // "unexpected method"
3565  }
3566 
3567  if (__kmp_env_consistency_check)
3568  __kmp_pop_sync(global_tid, ct_reduce, loc);
3569 
3570  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3571  global_tid, packed_reduction_method));
3572 
3573  return;
3574 }
3575 
3576 /* 2.a.ii. Reduce Block with a terminating barrier */
3577 
3593 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3594  size_t reduce_size, void *reduce_data,
3595  void (*reduce_func)(void *lhs_data, void *rhs_data),
3596  kmp_critical_name *lck) {
3597  KMP_COUNT_BLOCK(REDUCE_wait);
3598  int retval = 0;
3599  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3600 #if OMP_40_ENABLED
3601  kmp_info_t *th;
3602  kmp_team_t *team;
3603  int teams_swapped = 0, task_state;
3604 #endif
3605 
3606  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3607 
3608  // why do we need this initialization here at all?
3609  // Reduction clause can not be a stand-alone directive.
3610 
3611  // do not call __kmp_serial_initialize(), it will be called by
3612  // __kmp_parallel_initialize() if needed
3613  // possible detection of false-positive race by the threadchecker ???
3614  if (!TCR_4(__kmp_init_parallel))
3615  __kmp_parallel_initialize();
3616 
3617 #if OMP_50_ENABLED
3618  __kmp_resume_if_soft_paused();
3619 #endif
3620 
3621 // check correctness of reduce block nesting
3622 #if KMP_USE_DYNAMIC_LOCK
3623  if (__kmp_env_consistency_check)
3624  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3625 #else
3626  if (__kmp_env_consistency_check)
3627  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3628 #endif
3629 
3630 #if OMP_40_ENABLED
3631  th = __kmp_thread_from_gtid(global_tid);
3632  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3633 #endif // OMP_40_ENABLED
3634 
3635  packed_reduction_method = __kmp_determine_reduction_method(
3636  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3637  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3638 
3639  if (packed_reduction_method == critical_reduce_block) {
3640 
3641  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3642  retval = 1;
3643 
3644  } else if (packed_reduction_method == empty_reduce_block) {
3645 
3646  // usage: if team size == 1, no synchronization is required ( Intel
3647  // platforms only )
3648  retval = 1;
3649 
3650  } else if (packed_reduction_method == atomic_reduce_block) {
3651 
3652  retval = 2;
3653 
3654  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3655  tree_reduce_block)) {
3656 
3657 // case tree_reduce_block:
3658 // this barrier should be visible to a customer and to the threading profile
3659 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3660 #if OMPT_SUPPORT
3661  ompt_frame_t *ompt_frame;
3662  if (ompt_enabled.enabled) {
3663  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3664  if (ompt_frame->enter_frame.ptr == NULL)
3665  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3666  OMPT_STORE_RETURN_ADDRESS(global_tid);
3667  }
3668 #endif
3669 #if USE_ITT_NOTIFY
3670  __kmp_threads[global_tid]->th.th_ident =
3671  loc; // needed for correct notification of frames
3672 #endif
3673  retval =
3674  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3675  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3676  retval = (retval != 0) ? (0) : (1);
3677 #if OMPT_SUPPORT && OMPT_OPTIONAL
3678  if (ompt_enabled.enabled) {
3679  ompt_frame->enter_frame = ompt_data_none;
3680  }
3681 #endif
3682 
3683  // all other workers except master should do this pop here
3684  // ( none of other workers except master will enter __kmpc_end_reduce() )
3685  if (__kmp_env_consistency_check) {
3686  if (retval == 0) { // 0: all other workers; 1: master
3687  __kmp_pop_sync(global_tid, ct_reduce, loc);
3688  }
3689  }
3690 
3691  } else {
3692 
3693  // should never reach this block
3694  KMP_ASSERT(0); // "unexpected method"
3695  }
3696 #if OMP_40_ENABLED
3697  if (teams_swapped) {
3698  __kmp_restore_swapped_teams(th, team, task_state);
3699  }
3700 #endif
3701 
3702  KA_TRACE(10,
3703  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3704  global_tid, packed_reduction_method, retval));
3705 
3706  return retval;
3707 }
3708 
3719 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3720  kmp_critical_name *lck) {
3721 
3722  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3723 #if OMP_40_ENABLED
3724  kmp_info_t *th;
3725  kmp_team_t *team;
3726  int teams_swapped = 0, task_state;
3727 #endif
3728 
3729  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3730 
3731 #if OMP_40_ENABLED
3732  th = __kmp_thread_from_gtid(global_tid);
3733  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3734 #endif // OMP_40_ENABLED
3735 
3736  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3737 
3738  // this barrier should be visible to a customer and to the threading profile
3739  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3740 
3741  if (packed_reduction_method == critical_reduce_block) {
3742 
3743  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3744 
3745 // TODO: implicit barrier: should be exposed
3746 #if OMPT_SUPPORT
3747  ompt_frame_t *ompt_frame;
3748  if (ompt_enabled.enabled) {
3749  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3750  if (ompt_frame->enter_frame.ptr == NULL)
3751  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3752  OMPT_STORE_RETURN_ADDRESS(global_tid);
3753  }
3754 #endif
3755 #if USE_ITT_NOTIFY
3756  __kmp_threads[global_tid]->th.th_ident = loc;
3757 #endif
3758  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3759 #if OMPT_SUPPORT && OMPT_OPTIONAL
3760  if (ompt_enabled.enabled) {
3761  ompt_frame->enter_frame = ompt_data_none;
3762  }
3763 #endif
3764 
3765  } else if (packed_reduction_method == empty_reduce_block) {
3766 
3767 // usage: if team size==1, no synchronization is required (Intel platforms only)
3768 
3769 // TODO: implicit barrier: should be exposed
3770 #if OMPT_SUPPORT
3771  ompt_frame_t *ompt_frame;
3772  if (ompt_enabled.enabled) {
3773  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3774  if (ompt_frame->enter_frame.ptr == NULL)
3775  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3776  OMPT_STORE_RETURN_ADDRESS(global_tid);
3777  }
3778 #endif
3779 #if USE_ITT_NOTIFY
3780  __kmp_threads[global_tid]->th.th_ident = loc;
3781 #endif
3782  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3783 #if OMPT_SUPPORT && OMPT_OPTIONAL
3784  if (ompt_enabled.enabled) {
3785  ompt_frame->enter_frame = ompt_data_none;
3786  }
3787 #endif
3788 
3789  } else if (packed_reduction_method == atomic_reduce_block) {
3790 
3791 #if OMPT_SUPPORT
3792  ompt_frame_t *ompt_frame;
3793  if (ompt_enabled.enabled) {
3794  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3795  if (ompt_frame->enter_frame.ptr == NULL)
3796  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3797  OMPT_STORE_RETURN_ADDRESS(global_tid);
3798  }
3799 #endif
3800 // TODO: implicit barrier: should be exposed
3801 #if USE_ITT_NOTIFY
3802  __kmp_threads[global_tid]->th.th_ident = loc;
3803 #endif
3804  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3805 #if OMPT_SUPPORT && OMPT_OPTIONAL
3806  if (ompt_enabled.enabled) {
3807  ompt_frame->enter_frame = ompt_data_none;
3808  }
3809 #endif
3810 
3811  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3812  tree_reduce_block)) {
3813 
3814  // only master executes here (master releases all other workers)
3815  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3816  global_tid);
3817 
3818  } else {
3819 
3820  // should never reach this block
3821  KMP_ASSERT(0); // "unexpected method"
3822  }
3823 #if OMP_40_ENABLED
3824  if (teams_swapped) {
3825  __kmp_restore_swapped_teams(th, team, task_state);
3826  }
3827 #endif
3828 
3829  if (__kmp_env_consistency_check)
3830  __kmp_pop_sync(global_tid, ct_reduce, loc);
3831 
3832  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3833  global_tid, packed_reduction_method));
3834 
3835  return;
3836 }
3837 
3838 #undef __KMP_GET_REDUCTION_METHOD
3839 #undef __KMP_SET_REDUCTION_METHOD
3840 
3841 /* end of interface to fast scalable reduce routines */
3842 
3843 kmp_uint64 __kmpc_get_taskid() {
3844 
3845  kmp_int32 gtid;
3846  kmp_info_t *thread;
3847 
3848  gtid = __kmp_get_gtid();
3849  if (gtid < 0) {
3850  return 0;
3851  }
3852  thread = __kmp_thread_from_gtid(gtid);
3853  return thread->th.th_current_task->td_task_id;
3854 
3855 } // __kmpc_get_taskid
3856 
3857 kmp_uint64 __kmpc_get_parent_taskid() {
3858 
3859  kmp_int32 gtid;
3860  kmp_info_t *thread;
3861  kmp_taskdata_t *parent_task;
3862 
3863  gtid = __kmp_get_gtid();
3864  if (gtid < 0) {
3865  return 0;
3866  }
3867  thread = __kmp_thread_from_gtid(gtid);
3868  parent_task = thread->th.th_current_task->td_parent;
3869  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3870 
3871 } // __kmpc_get_parent_taskid
3872 
3873 #if OMP_45_ENABLED
3874 
3885 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3886  const struct kmp_dim *dims) {
3887  int j, idx;
3888  kmp_int64 last, trace_count;
3889  kmp_info_t *th = __kmp_threads[gtid];
3890  kmp_team_t *team = th->th.th_team;
3891  kmp_uint32 *flags;
3892  kmp_disp_t *pr_buf = th->th.th_dispatch;
3893  dispatch_shared_info_t *sh_buf;
3894 
3895  KA_TRACE(
3896  20,
3897  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3898  gtid, num_dims, !team->t.t_serialized));
3899  KMP_DEBUG_ASSERT(dims != NULL);
3900  KMP_DEBUG_ASSERT(num_dims > 0);
3901 
3902  if (team->t.t_serialized) {
3903  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3904  return; // no dependencies if team is serialized
3905  }
3906  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3907  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3908  // the next loop
3909  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3910 
3911  // Save bounds info into allocated private buffer
3912  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3913  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3914  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3915  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3916  pr_buf->th_doacross_info[0] =
3917  (kmp_int64)num_dims; // first element is number of dimensions
3918  // Save also address of num_done in order to access it later without knowing
3919  // the buffer index
3920  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3921  pr_buf->th_doacross_info[2] = dims[0].lo;
3922  pr_buf->th_doacross_info[3] = dims[0].up;
3923  pr_buf->th_doacross_info[4] = dims[0].st;
3924  last = 5;
3925  for (j = 1; j < num_dims; ++j) {
3926  kmp_int64
3927  range_length; // To keep ranges of all dimensions but the first dims[0]
3928  if (dims[j].st == 1) { // most common case
3929  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3930  range_length = dims[j].up - dims[j].lo + 1;
3931  } else {
3932  if (dims[j].st > 0) {
3933  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3934  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3935  } else { // negative increment
3936  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3937  range_length =
3938  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3939  }
3940  }
3941  pr_buf->th_doacross_info[last++] = range_length;
3942  pr_buf->th_doacross_info[last++] = dims[j].lo;
3943  pr_buf->th_doacross_info[last++] = dims[j].up;
3944  pr_buf->th_doacross_info[last++] = dims[j].st;
3945  }
3946 
3947  // Compute total trip count.
3948  // Start with range of dims[0] which we don't need to keep in the buffer.
3949  if (dims[0].st == 1) { // most common case
3950  trace_count = dims[0].up - dims[0].lo + 1;
3951  } else if (dims[0].st > 0) {
3952  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3953  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3954  } else { // negative increment
3955  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3956  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3957  }
3958  for (j = 1; j < num_dims; ++j) {
3959  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3960  }
3961  KMP_DEBUG_ASSERT(trace_count > 0);
3962 
3963  // Check if shared buffer is not occupied by other loop (idx -
3964  // __kmp_dispatch_num_buffers)
3965  if (idx != sh_buf->doacross_buf_idx) {
3966  // Shared buffer is occupied, wait for it to be free
3967  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3968  __kmp_eq_4, NULL);
3969  }
3970 #if KMP_32_BIT_ARCH
3971  // Check if we are the first thread. After the CAS the first thread gets 0,
3972  // others get 1 if initialization is in progress, allocated pointer otherwise.
3973  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3974  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3975  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3976 #else
3977  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3978  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3979 #endif
3980  if (flags == NULL) {
3981  // we are the first thread, allocate the array of flags
3982  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3983  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3984  KMP_MB();
3985  sh_buf->doacross_flags = flags;
3986  } else if (flags == (kmp_uint32 *)1) {
3987 #if KMP_32_BIT_ARCH
3988  // initialization is still in progress, need to wait
3989  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3990 #else
3991  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3992 #endif
3993  KMP_YIELD(TRUE);
3994  KMP_MB();
3995  } else {
3996  KMP_MB();
3997  }
3998  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3999  pr_buf->th_doacross_flags =
4000  sh_buf->doacross_flags; // save private copy in order to not
4001  // touch shared buffer on each iteration
4002  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4003 }
4004 
4005 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4006  kmp_int32 shft, num_dims, i;
4007  kmp_uint32 flag;
4008  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4009  kmp_info_t *th = __kmp_threads[gtid];
4010  kmp_team_t *team = th->th.th_team;
4011  kmp_disp_t *pr_buf;
4012  kmp_int64 lo, up, st;
4013 
4014  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4015  if (team->t.t_serialized) {
4016  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4017  return; // no dependencies if team is serialized
4018  }
4019 
4020  // calculate sequential iteration number and check out-of-bounds condition
4021  pr_buf = th->th.th_dispatch;
4022  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4023  num_dims = pr_buf->th_doacross_info[0];
4024  lo = pr_buf->th_doacross_info[2];
4025  up = pr_buf->th_doacross_info[3];
4026  st = pr_buf->th_doacross_info[4];
4027  if (st == 1) { // most common case
4028  if (vec[0] < lo || vec[0] > up) {
4029  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4030  "bounds [%lld,%lld]\n",
4031  gtid, vec[0], lo, up));
4032  return;
4033  }
4034  iter_number = vec[0] - lo;
4035  } else if (st > 0) {
4036  if (vec[0] < lo || vec[0] > up) {
4037  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4038  "bounds [%lld,%lld]\n",
4039  gtid, vec[0], lo, up));
4040  return;
4041  }
4042  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4043  } else { // negative increment
4044  if (vec[0] > lo || vec[0] < up) {
4045  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4046  "bounds [%lld,%lld]\n",
4047  gtid, vec[0], lo, up));
4048  return;
4049  }
4050  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4051  }
4052  for (i = 1; i < num_dims; ++i) {
4053  kmp_int64 iter, ln;
4054  kmp_int32 j = i * 4;
4055  ln = pr_buf->th_doacross_info[j + 1];
4056  lo = pr_buf->th_doacross_info[j + 2];
4057  up = pr_buf->th_doacross_info[j + 3];
4058  st = pr_buf->th_doacross_info[j + 4];
4059  if (st == 1) {
4060  if (vec[i] < lo || vec[i] > up) {
4061  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4062  "bounds [%lld,%lld]\n",
4063  gtid, vec[i], lo, up));
4064  return;
4065  }
4066  iter = vec[i] - lo;
4067  } else if (st > 0) {
4068  if (vec[i] < lo || vec[i] > up) {
4069  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4070  "bounds [%lld,%lld]\n",
4071  gtid, vec[i], lo, up));
4072  return;
4073  }
4074  iter = (kmp_uint64)(vec[i] - lo) / st;
4075  } else { // st < 0
4076  if (vec[i] > lo || vec[i] < up) {
4077  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4078  "bounds [%lld,%lld]\n",
4079  gtid, vec[i], lo, up));
4080  return;
4081  }
4082  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4083  }
4084  iter_number = iter + ln * iter_number;
4085  }
4086  shft = iter_number % 32; // use 32-bit granularity
4087  iter_number >>= 5; // divided by 32
4088  flag = 1 << shft;
4089  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4090  KMP_YIELD(TRUE);
4091  }
4092  KMP_MB();
4093  KA_TRACE(20,
4094  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4095  gtid, (iter_number << 5) + shft));
4096 }
4097 
4098 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4099  kmp_int32 shft, num_dims, i;
4100  kmp_uint32 flag;
4101  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4102  kmp_info_t *th = __kmp_threads[gtid];
4103  kmp_team_t *team = th->th.th_team;
4104  kmp_disp_t *pr_buf;
4105  kmp_int64 lo, st;
4106 
4107  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4108  if (team->t.t_serialized) {
4109  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4110  return; // no dependencies if team is serialized
4111  }
4112 
4113  // calculate sequential iteration number (same as in "wait" but no
4114  // out-of-bounds checks)
4115  pr_buf = th->th.th_dispatch;
4116  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4117  num_dims = pr_buf->th_doacross_info[0];
4118  lo = pr_buf->th_doacross_info[2];
4119  st = pr_buf->th_doacross_info[4];
4120  if (st == 1) { // most common case
4121  iter_number = vec[0] - lo;
4122  } else if (st > 0) {
4123  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4124  } else { // negative increment
4125  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4126  }
4127  for (i = 1; i < num_dims; ++i) {
4128  kmp_int64 iter, ln;
4129  kmp_int32 j = i * 4;
4130  ln = pr_buf->th_doacross_info[j + 1];
4131  lo = pr_buf->th_doacross_info[j + 2];
4132  st = pr_buf->th_doacross_info[j + 4];
4133  if (st == 1) {
4134  iter = vec[i] - lo;
4135  } else if (st > 0) {
4136  iter = (kmp_uint64)(vec[i] - lo) / st;
4137  } else { // st < 0
4138  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4139  }
4140  iter_number = iter + ln * iter_number;
4141  }
4142  shft = iter_number % 32; // use 32-bit granularity
4143  iter_number >>= 5; // divided by 32
4144  flag = 1 << shft;
4145  KMP_MB();
4146  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4147  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4148  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4149  (iter_number << 5) + shft));
4150 }
4151 
4152 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4153  kmp_int32 num_done;
4154  kmp_info_t *th = __kmp_threads[gtid];
4155  kmp_team_t *team = th->th.th_team;
4156  kmp_disp_t *pr_buf = th->th.th_dispatch;
4157 
4158  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4159  if (team->t.t_serialized) {
4160  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4161  return; // nothing to do
4162  }
4163  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4164  if (num_done == th->th.th_team_nproc) {
4165  // we are the last thread, need to free shared resources
4166  int idx = pr_buf->th_doacross_buf_idx - 1;
4167  dispatch_shared_info_t *sh_buf =
4168  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4169  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4170  (kmp_int64)&sh_buf->doacross_num_done);
4171  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4172  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4173  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4174  sh_buf->doacross_flags = NULL;
4175  sh_buf->doacross_num_done = 0;
4176  sh_buf->doacross_buf_idx +=
4177  __kmp_dispatch_num_buffers; // free buffer for future re-use
4178  }
4179  // free private resources (need to keep buffer index forever)
4180  pr_buf->th_doacross_flags = NULL;
4181  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4182  pr_buf->th_doacross_info = NULL;
4183  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4184 }
4185 #endif
4186 
4187 #if OMP_50_ENABLED
4188 int __kmpc_get_target_offload(void) {
4189  if (!__kmp_init_serial) {
4190  __kmp_serial_initialize();
4191  }
4192  return __kmp_target_offload;
4193 }
4194 
4195 int __kmpc_pause_resource(kmp_pause_status_t level) {
4196  if (!__kmp_init_serial) {
4197  return 1; // Can't pause if runtime is not initialized
4198  }
4199  return __kmp_pause_resource(level);
4200 }
4201 #endif // OMP_50_ENABLED
4202 
4203 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:224
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1421
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:234
kmp_int32 flags
Definition: kmp.h:226