LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #define __KMP_IMP
15 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp.h"
17 #include "kmp_error.h"
18 #include "kmp_i18n.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_stats.h"
22 
23 #if OMPT_SUPPORT
24 #include "ompt-specific.h"
25 #endif
26 
27 #define MAX_MESSAGE 512
28 
29 // flags will be used in future, e.g. to implement openmp_strict library
30 // restrictions
31 
40 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
41  // By default __kmpc_begin() is no-op.
42  char *env;
43  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
44  __kmp_str_match_true(env)) {
45  __kmp_middle_initialize();
46  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
47  } else if (__kmp_ignore_mppbeg() == FALSE) {
48  // By default __kmp_ignore_mppbeg() returns TRUE.
49  __kmp_internal_begin();
50  KC_TRACE(10, ("__kmpc_begin: called\n"));
51  }
52 }
53 
62 void __kmpc_end(ident_t *loc) {
63  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
64  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
65  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
66  // returns FALSE and __kmpc_end() will unregister this root (it can cause
67  // library shut down).
68  if (__kmp_ignore_mppend() == FALSE) {
69  KC_TRACE(10, ("__kmpc_end: called\n"));
70  KA_TRACE(30, ("__kmpc_end\n"));
71 
72  __kmp_internal_end_thread(-1);
73  }
74 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75  // Normal exit process on Windows does not allow worker threads of the final
76  // parallel region to finish reporting their events, so shutting down the
77  // library here fixes the issue at least for the cases where __kmpc_end() is
78  // placed properly.
79  if (ompt_enabled.enabled)
80  __kmp_internal_end_library(__kmp_gtid_get_specific());
81 #endif
82 }
83 
103  kmp_int32 gtid = __kmp_entry_gtid();
104 
105  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
106 
107  return gtid;
108 }
109 
125  KC_TRACE(10,
126  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
127 
128  return TCR_4(__kmp_all_nth);
129 }
130 
138  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
139  return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 }
141 
148  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
149 
150  return __kmp_entry_thread()->th.th_team->t.t_nproc;
151 }
152 
159 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
160 #ifndef KMP_DEBUG
161 
162  return TRUE;
163 
164 #else
165 
166  const char *semi2;
167  const char *semi3;
168  int line_no;
169 
170  if (__kmp_par_range == 0) {
171  return TRUE;
172  }
173  semi2 = loc->psource;
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  semi2 = strchr(semi2 + 1, ';');
182  if (semi2 == NULL) {
183  return TRUE;
184  }
185  if (__kmp_par_range_filename[0]) {
186  const char *name = semi2 - 1;
187  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
188  name--;
189  }
190  if ((*name == '/') || (*name == ';')) {
191  name++;
192  }
193  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
194  return __kmp_par_range < 0;
195  }
196  }
197  semi3 = strchr(semi2 + 1, ';');
198  if (__kmp_par_range_routine[0]) {
199  if ((semi3 != NULL) && (semi3 > semi2) &&
200  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
201  return __kmp_par_range < 0;
202  }
203  }
204  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
205  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
206  return __kmp_par_range > 0;
207  }
208  return __kmp_par_range < 0;
209  }
210  return TRUE;
211 
212 #endif /* KMP_DEBUG */
213 }
214 
221 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
222  return __kmp_entry_thread()->th.th_root->r.r_active;
223 }
224 
234 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
235  kmp_int32 num_threads) {
236  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
237  global_tid, num_threads));
238 
239  __kmp_push_num_threads(loc, global_tid, num_threads);
240 }
241 
242 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
243  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
244 
245  /* the num_threads are automatically popped */
246 }
247 
248 #if OMP_40_ENABLED
249 
250 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
251  kmp_int32 proc_bind) {
252  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
253  proc_bind));
254 
255  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
256 }
257 
258 #endif /* OMP_40_ENABLED */
259 
270 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
271  int gtid = __kmp_entry_gtid();
272 
273 #if (KMP_STATS_ENABLED)
274  // If we were in a serial region, then stop the serial timer, record
275  // the event, and start parallel region timer
276  stats_state_e previous_state = KMP_GET_THREAD_STATE();
277  if (previous_state == stats_state_e::SERIAL_REGION) {
278  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
279  } else {
280  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
281  }
282  int inParallel = __kmpc_in_parallel(loc);
283  if (inParallel) {
284  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
285  } else {
286  KMP_COUNT_BLOCK(OMP_PARALLEL);
287  }
288 #endif
289 
290  // maybe to save thr_state is enough here
291  {
292  va_list ap;
293  va_start(ap, microtask);
294 
295 #if OMPT_SUPPORT
296  omp_frame_t *ompt_frame;
297  if (ompt_enabled.enabled) {
298  kmp_info_t *master_th = __kmp_threads[gtid];
299  kmp_team_t *parent_team = master_th->th.th_team;
300  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
301  if (lwt)
302  ompt_frame = &(lwt->ompt_task_info.frame);
303  else {
304  int tid = __kmp_tid_from_gtid(gtid);
305  ompt_frame = &(
306  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
307  }
308  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
309  OMPT_STORE_RETURN_ADDRESS(gtid);
310  }
311 #endif
312 
313 #if INCLUDE_SSC_MARKS
314  SSC_MARK_FORKING();
315 #endif
316  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
317  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
318  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
319 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
320 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
321  &ap
322 #else
323  ap
324 #endif
325  );
326 #if INCLUDE_SSC_MARKS
327  SSC_MARK_JOINING();
328 #endif
329  __kmp_join_call(loc, gtid
330 #if OMPT_SUPPORT
331  ,
332  fork_context_intel
333 #endif
334  );
335 
336  va_end(ap);
337  }
338 
339 #if KMP_STATS_ENABLED
340  if (previous_state == stats_state_e::SERIAL_REGION) {
341  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
342  } else {
343  KMP_POP_PARTITIONED_TIMER();
344  }
345 #endif // KMP_STATS_ENABLED
346 }
347 
348 #if OMP_40_ENABLED
349 
360 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
361  kmp_int32 num_teams, kmp_int32 num_threads) {
362  KA_TRACE(20,
363  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
364  global_tid, num_teams, num_threads));
365 
366  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
367 }
368 
379 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
380  ...) {
381  int gtid = __kmp_entry_gtid();
382  kmp_info_t *this_thr = __kmp_threads[gtid];
383  va_list ap;
384  va_start(ap, microtask);
385 
386  KMP_COUNT_BLOCK(OMP_TEAMS);
387 
388  // remember teams entry point and nesting level
389  this_thr->th.th_teams_microtask = microtask;
390  this_thr->th.th_teams_level =
391  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
392 
393 #if OMPT_SUPPORT
394  kmp_team_t *parent_team = this_thr->th.th_team;
395  int tid = __kmp_tid_from_gtid(gtid);
396  if (ompt_enabled.enabled) {
397  parent_team->t.t_implicit_task_taskdata[tid]
398  .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
399  }
400  OMPT_STORE_RETURN_ADDRESS(gtid);
401 #endif
402 
403  // check if __kmpc_push_num_teams called, set default number of teams
404  // otherwise
405  if (this_thr->th.th_teams_size.nteams == 0) {
406  __kmp_push_num_teams(loc, gtid, 0, 0);
407  }
408  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
410  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 
412  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
413  VOLATILE_CAST(microtask_t)
414  __kmp_teams_master, // "wrapped" task
415  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
416 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
417  &ap
418 #else
419  ap
420 #endif
421  );
422  __kmp_join_call(loc, gtid
423 #if OMPT_SUPPORT
424  ,
425  fork_context_intel
426 #endif
427  );
428 
429  this_thr->th.th_teams_microtask = NULL;
430  this_thr->th.th_teams_level = 0;
431  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
432  va_end(ap);
433 }
434 #endif /* OMP_40_ENABLED */
435 
436 // I don't think this function should ever have been exported.
437 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
438 // openmp code ever called it, but it's been exported from the RTL for so
439 // long that I'm afraid to remove the definition.
440 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
441 
454 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
455 // The implementation is now in kmp_runtime.cpp so that it can share static
456 // functions with kmp_fork_call since the tasks to be done are similar in
457 // each case.
458 #if OMPT_SUPPORT
459  OMPT_STORE_RETURN_ADDRESS(global_tid);
460 #endif
461  __kmp_serialized_parallel(loc, global_tid);
462 }
463 
471 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
472  kmp_internal_control_t *top;
473  kmp_info_t *this_thr;
474  kmp_team_t *serial_team;
475 
476  KC_TRACE(10,
477  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
478 
479  /* skip all this code for autopar serialized loops since it results in
480  unacceptable overhead */
481  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
482  return;
483 
484  // Not autopar code
485  if (!TCR_4(__kmp_init_parallel))
486  __kmp_parallel_initialize();
487 
488  this_thr = __kmp_threads[global_tid];
489  serial_team = this_thr->th.th_serial_team;
490 
491 #if OMP_45_ENABLED
492  kmp_task_team_t *task_team = this_thr->th.th_task_team;
493 
494  // we need to wait for the proxy tasks before finishing the thread
495  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
496  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
497 #endif
498 
499  KMP_MB();
500  KMP_DEBUG_ASSERT(serial_team);
501  KMP_ASSERT(serial_team->t.t_serialized);
502  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
503  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
504  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
505  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
506 
507 #if OMPT_SUPPORT
508  if (ompt_enabled.enabled &&
509  this_thr->th.ompt_thread_info.state != omp_state_overhead) {
510  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
511  if (ompt_enabled.ompt_callback_implicit_task) {
512  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
513  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
514  OMPT_CUR_TASK_INFO(this_thr)->thread_num);
515  }
516 
517  // reset clear the task id only after unlinking the task
518  ompt_data_t *parent_task_data;
519  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
520 
521  if (ompt_enabled.ompt_callback_parallel_end) {
522  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
523  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
524  ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
525  }
526  __ompt_lw_taskteam_unlink(this_thr);
527  this_thr->th.ompt_thread_info.state = omp_state_overhead;
528  }
529 #endif
530 
531  /* If necessary, pop the internal control stack values and replace the team
532  * values */
533  top = serial_team->t.t_control_stack_top;
534  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
535  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
536  serial_team->t.t_control_stack_top = top->next;
537  __kmp_free(top);
538  }
539 
540  // if( serial_team -> t.t_serialized > 1 )
541  serial_team->t.t_level--;
542 
543  /* pop dispatch buffers stack */
544  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
545  {
546  dispatch_private_info_t *disp_buffer =
547  serial_team->t.t_dispatch->th_disp_buffer;
548  serial_team->t.t_dispatch->th_disp_buffer =
549  serial_team->t.t_dispatch->th_disp_buffer->next;
550  __kmp_free(disp_buffer);
551  }
552 #if OMP_50_ENABLED
553  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
554 #endif
555 
556  --serial_team->t.t_serialized;
557  if (serial_team->t.t_serialized == 0) {
558 
559 /* return to the parallel section */
560 
561 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
562  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
563  __kmp_clear_x87_fpu_status_word();
564  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
565  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
566  }
567 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
568 
569  this_thr->th.th_team = serial_team->t.t_parent;
570  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
571 
572  /* restore values cached in the thread */
573  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
574  this_thr->th.th_team_master =
575  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
576  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
577 
578  /* TODO the below shouldn't need to be adjusted for serialized teams */
579  this_thr->th.th_dispatch =
580  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
581 
582  __kmp_pop_current_task_from_thread(this_thr);
583 
584  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
585  this_thr->th.th_current_task->td_flags.executing = 1;
586 
587  if (__kmp_tasking_mode != tskm_immediate_exec) {
588  // Copy the task team from the new child / old parent team to the thread.
589  this_thr->th.th_task_team =
590  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
591  KA_TRACE(20,
592  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
593  "team %p\n",
594  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
595  }
596  } else {
597  if (__kmp_tasking_mode != tskm_immediate_exec) {
598  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
599  "depth of serial team %p to %d\n",
600  global_tid, serial_team, serial_team->t.t_serialized));
601  }
602  }
603 
604  if (__kmp_env_consistency_check)
605  __kmp_pop_parallel(global_tid, NULL);
606 #if OMPT_SUPPORT
607  if (ompt_enabled.enabled)
608  this_thr->th.ompt_thread_info.state =
609  ((this_thr->th.th_team_serialized) ? omp_state_work_serial
610  : omp_state_work_parallel);
611 #endif
612 }
613 
622 void __kmpc_flush(ident_t *loc) {
623  KC_TRACE(10, ("__kmpc_flush: called\n"));
624 
625  /* need explicit __mf() here since use volatile instead in library */
626  KMP_MB(); /* Flush all pending memory write invalidates. */
627 
628 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
629 #if KMP_MIC
630 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
631 // We shouldn't need it, though, since the ABI rules require that
632 // * If the compiler generates NGO stores it also generates the fence
633 // * If users hand-code NGO stores they should insert the fence
634 // therefore no incomplete unordered stores should be visible.
635 #else
636  // C74404
637  // This is to address non-temporal store instructions (sfence needed).
638  // The clflush instruction is addressed either (mfence needed).
639  // Probably the non-temporal load monvtdqa instruction should also be
640  // addressed.
641  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
642  if (!__kmp_cpuinfo.initialized) {
643  __kmp_query_cpuid(&__kmp_cpuinfo);
644  }
645  if (!__kmp_cpuinfo.sse2) {
646  // CPU cannot execute SSE2 instructions.
647  } else {
648 #if KMP_COMPILER_ICC
649  _mm_mfence();
650 #elif KMP_COMPILER_MSVC
651  MemoryBarrier();
652 #else
653  __sync_synchronize();
654 #endif // KMP_COMPILER_ICC
655  }
656 #endif // KMP_MIC
657 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64)
658 // Nothing to see here move along
659 #elif KMP_ARCH_PPC64
660 // Nothing needed here (we have a real MB above).
661 #if KMP_OS_CNK
662  // The flushing thread needs to yield here; this prevents a
663  // busy-waiting thread from saturating the pipeline. flush is
664  // often used in loops like this:
665  // while (!flag) {
666  // #pragma omp flush(flag)
667  // }
668  // and adding the yield here is good for at least a 10x speedup
669  // when running >2 threads per core (on the NAS LU benchmark).
670  __kmp_yield(TRUE);
671 #endif
672 #else
673 #error Unknown or unsupported architecture
674 #endif
675 
676 #if OMPT_SUPPORT && OMPT_OPTIONAL
677  if (ompt_enabled.ompt_callback_flush) {
678  ompt_callbacks.ompt_callback(ompt_callback_flush)(
679  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
680  }
681 #endif
682 }
683 
684 /* -------------------------------------------------------------------------- */
692 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
693  KMP_COUNT_BLOCK(OMP_BARRIER);
694  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
695 
696  if (!TCR_4(__kmp_init_parallel))
697  __kmp_parallel_initialize();
698 
699  if (__kmp_env_consistency_check) {
700  if (loc == 0) {
701  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
702  }
703 
704  __kmp_check_barrier(global_tid, ct_barrier, loc);
705  }
706 
707 #if OMPT_SUPPORT
708  omp_frame_t *ompt_frame;
709  if (ompt_enabled.enabled) {
710  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
711  if (ompt_frame->enter_frame == NULL)
712  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
713  OMPT_STORE_RETURN_ADDRESS(global_tid);
714  }
715 #endif
716  __kmp_threads[global_tid]->th.th_ident = loc;
717  // TODO: explicit barrier_wait_id:
718  // this function is called when 'barrier' directive is present or
719  // implicit barrier at the end of a worksharing construct.
720  // 1) better to add a per-thread barrier counter to a thread data structure
721  // 2) set to 0 when a new team is created
722  // 4) no sync is required
723 
724  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
725 #if OMPT_SUPPORT && OMPT_OPTIONAL
726  if (ompt_enabled.enabled) {
727  ompt_frame->enter_frame = NULL;
728  }
729 #endif
730 }
731 
732 /* The BARRIER for a MASTER section is always explicit */
739 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
740  int status = 0;
741 
742  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
743 
744  if (!TCR_4(__kmp_init_parallel))
745  __kmp_parallel_initialize();
746 
747  if (KMP_MASTER_GTID(global_tid)) {
748  KMP_COUNT_BLOCK(OMP_MASTER);
749  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
750  status = 1;
751  }
752 
753 #if OMPT_SUPPORT && OMPT_OPTIONAL
754  if (status) {
755  if (ompt_enabled.ompt_callback_master) {
756  kmp_info_t *this_thr = __kmp_threads[global_tid];
757  kmp_team_t *team = this_thr->th.th_team;
758 
759  int tid = __kmp_tid_from_gtid(global_tid);
760  ompt_callbacks.ompt_callback(ompt_callback_master)(
761  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
762  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
763  OMPT_GET_RETURN_ADDRESS(0));
764  }
765  }
766 #endif
767 
768  if (__kmp_env_consistency_check) {
769 #if KMP_USE_DYNAMIC_LOCK
770  if (status)
771  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
772  else
773  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
774 #else
775  if (status)
776  __kmp_push_sync(global_tid, ct_master, loc, NULL);
777  else
778  __kmp_check_sync(global_tid, ct_master, loc, NULL);
779 #endif
780  }
781 
782  return status;
783 }
784 
793 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
794  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
795 
796  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
797  KMP_POP_PARTITIONED_TIMER();
798 
799 #if OMPT_SUPPORT && OMPT_OPTIONAL
800  kmp_info_t *this_thr = __kmp_threads[global_tid];
801  kmp_team_t *team = this_thr->th.th_team;
802  if (ompt_enabled.ompt_callback_master) {
803  int tid = __kmp_tid_from_gtid(global_tid);
804  ompt_callbacks.ompt_callback(ompt_callback_master)(
805  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
806  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
807  OMPT_GET_RETURN_ADDRESS(0));
808  }
809 #endif
810 
811  if (__kmp_env_consistency_check) {
812  if (global_tid < 0)
813  KMP_WARNING(ThreadIdentInvalid);
814 
815  if (KMP_MASTER_GTID(global_tid))
816  __kmp_pop_sync(global_tid, ct_master, loc);
817  }
818 }
819 
827 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
828  int cid = 0;
829  kmp_info_t *th;
830  KMP_DEBUG_ASSERT(__kmp_init_serial);
831 
832  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
833 
834  if (!TCR_4(__kmp_init_parallel))
835  __kmp_parallel_initialize();
836 
837 #if USE_ITT_BUILD
838  __kmp_itt_ordered_prep(gtid);
839 // TODO: ordered_wait_id
840 #endif /* USE_ITT_BUILD */
841 
842  th = __kmp_threads[gtid];
843 
844 #if OMPT_SUPPORT && OMPT_OPTIONAL
845  kmp_team_t *team;
846  omp_wait_id_t lck;
847  void *codeptr_ra;
848  if (ompt_enabled.enabled) {
849  OMPT_STORE_RETURN_ADDRESS(gtid);
850  team = __kmp_team_from_gtid(gtid);
851  lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
852  /* OMPT state update */
853  th->th.ompt_thread_info.wait_id = lck;
854  th->th.ompt_thread_info.state = omp_state_wait_ordered;
855 
856  /* OMPT event callback */
857  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
858  if (ompt_enabled.ompt_callback_mutex_acquire) {
859  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
860  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
861  (omp_wait_id_t)lck, codeptr_ra);
862  }
863  }
864 #endif
865 
866  if (th->th.th_dispatch->th_deo_fcn != 0)
867  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
868  else
869  __kmp_parallel_deo(&gtid, &cid, loc);
870 
871 #if OMPT_SUPPORT && OMPT_OPTIONAL
872  if (ompt_enabled.enabled) {
873  /* OMPT state update */
874  th->th.ompt_thread_info.state = omp_state_work_parallel;
875  th->th.ompt_thread_info.wait_id = 0;
876 
877  /* OMPT event callback */
878  if (ompt_enabled.ompt_callback_mutex_acquired) {
879  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
880  ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
881  }
882  }
883 #endif
884 
885 #if USE_ITT_BUILD
886  __kmp_itt_ordered_start(gtid);
887 #endif /* USE_ITT_BUILD */
888 }
889 
897 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
898  int cid = 0;
899  kmp_info_t *th;
900 
901  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
902 
903 #if USE_ITT_BUILD
904  __kmp_itt_ordered_end(gtid);
905 // TODO: ordered_wait_id
906 #endif /* USE_ITT_BUILD */
907 
908  th = __kmp_threads[gtid];
909 
910  if (th->th.th_dispatch->th_dxo_fcn != 0)
911  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
912  else
913  __kmp_parallel_dxo(&gtid, &cid, loc);
914 
915 #if OMPT_SUPPORT && OMPT_OPTIONAL
916  OMPT_STORE_RETURN_ADDRESS(gtid);
917  if (ompt_enabled.ompt_callback_mutex_released) {
918  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
919  ompt_mutex_ordered,
920  (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
921  OMPT_LOAD_RETURN_ADDRESS(gtid));
922  }
923 #endif
924 }
925 
926 #if KMP_USE_DYNAMIC_LOCK
927 
928 static __forceinline void
929 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
930  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
931  // Pointer to the allocated indirect lock is written to crit, while indexing
932  // is ignored.
933  void *idx;
934  kmp_indirect_lock_t **lck;
935  lck = (kmp_indirect_lock_t **)crit;
936  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
937  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
938  KMP_SET_I_LOCK_LOCATION(ilk, loc);
939  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
940  KA_TRACE(20,
941  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
942 #if USE_ITT_BUILD
943  __kmp_itt_critical_creating(ilk->lock, loc);
944 #endif
945  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
946  if (status == 0) {
947 #if USE_ITT_BUILD
948  __kmp_itt_critical_destroyed(ilk->lock);
949 #endif
950  // We don't really need to destroy the unclaimed lock here since it will be
951  // cleaned up at program exit.
952  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
953  }
954  KMP_DEBUG_ASSERT(*lck != NULL);
955 }
956 
957 // Fast-path acquire tas lock
958 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
959  { \
960  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
961  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
962  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
963  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
964  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
965  kmp_uint32 spins; \
966  KMP_FSYNC_PREPARE(l); \
967  KMP_INIT_YIELD(spins); \
968  if (TCR_4(__kmp_nth) > \
969  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
970  KMP_YIELD(TRUE); \
971  } else { \
972  KMP_YIELD_SPIN(spins); \
973  } \
974  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
975  while ( \
976  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
977  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
978  __kmp_spin_backoff(&backoff); \
979  if (TCR_4(__kmp_nth) > \
980  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
981  KMP_YIELD(TRUE); \
982  } else { \
983  KMP_YIELD_SPIN(spins); \
984  } \
985  } \
986  } \
987  KMP_FSYNC_ACQUIRED(l); \
988  }
989 
990 // Fast-path test tas lock
991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
992  { \
993  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
994  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
995  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
996  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
997  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
998  }
999 
1000 // Fast-path release tas lock
1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1002  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1003 
1004 #if KMP_USE_FUTEX
1005 
1006 #include <sys/syscall.h>
1007 #include <unistd.h>
1008 #ifndef FUTEX_WAIT
1009 #define FUTEX_WAIT 0
1010 #endif
1011 #ifndef FUTEX_WAKE
1012 #define FUTEX_WAKE 1
1013 #endif
1014 
1015 // Fast-path acquire futex lock
1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1017  { \
1018  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019  kmp_int32 gtid_code = (gtid + 1) << 1; \
1020  KMP_MB(); \
1021  KMP_FSYNC_PREPARE(ftx); \
1022  kmp_int32 poll_val; \
1023  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1024  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1025  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1026  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1027  if (!cond) { \
1028  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1029  poll_val | \
1030  KMP_LOCK_BUSY(1, futex))) { \
1031  continue; \
1032  } \
1033  poll_val |= KMP_LOCK_BUSY(1, futex); \
1034  } \
1035  kmp_int32 rc; \
1036  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1037  NULL, NULL, 0)) != 0) { \
1038  continue; \
1039  } \
1040  gtid_code |= 1; \
1041  } \
1042  KMP_FSYNC_ACQUIRED(ftx); \
1043  }
1044 
1045 // Fast-path test futex lock
1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1047  { \
1048  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1050  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1051  KMP_FSYNC_ACQUIRED(ftx); \
1052  rc = TRUE; \
1053  } else { \
1054  rc = FALSE; \
1055  } \
1056  }
1057 
1058 // Fast-path release futex lock
1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1060  { \
1061  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1062  KMP_MB(); \
1063  KMP_FSYNC_RELEASING(ftx); \
1064  kmp_int32 poll_val = \
1065  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1066  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1067  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1068  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1069  } \
1070  KMP_MB(); \
1071  KMP_YIELD(TCR_4(__kmp_nth) > \
1072  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \
1073  }
1074 
1075 #endif // KMP_USE_FUTEX
1076 
1077 #else // KMP_USE_DYNAMIC_LOCK
1078 
1079 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1080  ident_t const *loc,
1081  kmp_int32 gtid) {
1082  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1083 
1084  // Because of the double-check, the following load doesn't need to be volatile
1085  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1086 
1087  if (lck == NULL) {
1088  void *idx;
1089 
1090  // Allocate & initialize the lock.
1091  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1092  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1093  __kmp_init_user_lock_with_checks(lck);
1094  __kmp_set_user_lock_location(lck, loc);
1095 #if USE_ITT_BUILD
1096  __kmp_itt_critical_creating(lck);
1097 // __kmp_itt_critical_creating() should be called *before* the first usage
1098 // of underlying lock. It is the only place where we can guarantee it. There
1099 // are chances the lock will destroyed with no usage, but it is not a
1100 // problem, because this is not real event seen by user but rather setting
1101 // name for object (lock). See more details in kmp_itt.h.
1102 #endif /* USE_ITT_BUILD */
1103 
1104  // Use a cmpxchg instruction to slam the start of the critical section with
1105  // the lock pointer. If another thread beat us to it, deallocate the lock,
1106  // and use the lock that the other thread allocated.
1107  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1108 
1109  if (status == 0) {
1110 // Deallocate the lock and reload the value.
1111 #if USE_ITT_BUILD
1112  __kmp_itt_critical_destroyed(lck);
1113 // Let ITT know the lock is destroyed and the same memory location may be reused
1114 // for another purpose.
1115 #endif /* USE_ITT_BUILD */
1116  __kmp_destroy_user_lock_with_checks(lck);
1117  __kmp_user_lock_free(&idx, gtid, lck);
1118  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1119  KMP_DEBUG_ASSERT(lck != NULL);
1120  }
1121  }
1122  return lck;
1123 }
1124 
1125 #endif // KMP_USE_DYNAMIC_LOCK
1126 
1137 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1138  kmp_critical_name *crit) {
1139 #if KMP_USE_DYNAMIC_LOCK
1140 #if OMPT_SUPPORT && OMPT_OPTIONAL
1141  OMPT_STORE_RETURN_ADDRESS(global_tid);
1142 #endif // OMPT_SUPPORT
1143  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1144 #else
1145  KMP_COUNT_BLOCK(OMP_CRITICAL);
1146 #if OMPT_SUPPORT && OMPT_OPTIONAL
1147  omp_state_t prev_state = omp_state_undefined;
1148  ompt_thread_info_t ti;
1149 #endif
1150  kmp_user_lock_p lck;
1151 
1152  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1153 
1154  // TODO: add THR_OVHD_STATE
1155 
1156  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1157  KMP_CHECK_USER_LOCK_INIT();
1158 
1159  if ((__kmp_user_lock_kind == lk_tas) &&
1160  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1161  lck = (kmp_user_lock_p)crit;
1162  }
1163 #if KMP_USE_FUTEX
1164  else if ((__kmp_user_lock_kind == lk_futex) &&
1165  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1166  lck = (kmp_user_lock_p)crit;
1167  }
1168 #endif
1169  else { // ticket, queuing or drdpa
1170  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1171  }
1172 
1173  if (__kmp_env_consistency_check)
1174  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1175 
1176 // since the critical directive binds to all threads, not just the current
1177 // team we have to check this even if we are in a serialized team.
1178 // also, even if we are the uber thread, we still have to conduct the lock,
1179 // as we have to contend with sibling threads.
1180 
1181 #if USE_ITT_BUILD
1182  __kmp_itt_critical_acquiring(lck);
1183 #endif /* USE_ITT_BUILD */
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL
1185  OMPT_STORE_RETURN_ADDRESS(gtid);
1186  void *codeptr_ra = NULL;
1187  if (ompt_enabled.enabled) {
1188  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1189  /* OMPT state update */
1190  prev_state = ti.state;
1191  ti.wait_id = (omp_wait_id_t)lck;
1192  ti.state = omp_state_wait_critical;
1193 
1194  /* OMPT event callback */
1195  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1196  if (ompt_enabled.ompt_callback_mutex_acquire) {
1197  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1198  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1199  (omp_wait_id_t)crit, codeptr_ra);
1200  }
1201  }
1202 #endif
1203  // Value of 'crit' should be good for using as a critical_id of the critical
1204  // section directive.
1205  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1206 
1207 #if USE_ITT_BUILD
1208  __kmp_itt_critical_acquired(lck);
1209 #endif /* USE_ITT_BUILD */
1210 #if OMPT_SUPPORT && OMPT_OPTIONAL
1211  if (ompt_enabled.enabled) {
1212  /* OMPT state update */
1213  ti.state = prev_state;
1214  ti.wait_id = 0;
1215 
1216  /* OMPT event callback */
1217  if (ompt_enabled.ompt_callback_mutex_acquired) {
1218  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1219  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
1220  }
1221  }
1222 #endif
1223  KMP_POP_PARTITIONED_TIMER();
1224 
1225  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1226  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1227 #endif // KMP_USE_DYNAMIC_LOCK
1228 }
1229 
1230 #if KMP_USE_DYNAMIC_LOCK
1231 
1232 // Converts the given hint to an internal lock implementation
1233 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1234 #if KMP_USE_TSX
1235 #define KMP_TSX_LOCK(seq) lockseq_##seq
1236 #else
1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1238 #endif
1239 
1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1242 #else
1243 #define KMP_CPUINFO_RTM 0
1244 #endif
1245 
1246  // Hints that do not require further logic
1247  if (hint & kmp_lock_hint_hle)
1248  return KMP_TSX_LOCK(hle);
1249  if (hint & kmp_lock_hint_rtm)
1250  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1251  if (hint & kmp_lock_hint_adaptive)
1252  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1253 
1254  // Rule out conflicting hints first by returning the default lock
1255  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1256  return __kmp_user_lock_seq;
1257  if ((hint & omp_lock_hint_speculative) &&
1258  (hint & omp_lock_hint_nonspeculative))
1259  return __kmp_user_lock_seq;
1260 
1261  // Do not even consider speculation when it appears to be contended
1262  if (hint & omp_lock_hint_contended)
1263  return lockseq_queuing;
1264 
1265  // Uncontended lock without speculation
1266  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1267  return lockseq_tas;
1268 
1269  // HLE lock for speculation
1270  if (hint & omp_lock_hint_speculative)
1271  return KMP_TSX_LOCK(hle);
1272 
1273  return __kmp_user_lock_seq;
1274 }
1275 
1276 #if OMPT_SUPPORT && OMPT_OPTIONAL
1277 #if KMP_USE_DYNAMIC_LOCK
1278 static kmp_mutex_impl_t
1279 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1280  if (user_lock) {
1281  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1282  case 0:
1283  break;
1284 #if KMP_USE_FUTEX
1285  case locktag_futex:
1286  return kmp_mutex_impl_queuing;
1287 #endif
1288  case locktag_tas:
1289  return kmp_mutex_impl_spin;
1290 #if KMP_USE_TSX
1291  case locktag_hle:
1292  return kmp_mutex_impl_speculative;
1293 #endif
1294  default:
1295  return ompt_mutex_impl_unknown;
1296  }
1297  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1298  }
1299  KMP_ASSERT(ilock);
1300  switch (ilock->type) {
1301 #if KMP_USE_TSX
1302  case locktag_adaptive:
1303  case locktag_rtm:
1304  return kmp_mutex_impl_speculative;
1305 #endif
1306  case locktag_nested_tas:
1307  return kmp_mutex_impl_spin;
1308 #if KMP_USE_FUTEX
1309  case locktag_nested_futex:
1310 #endif
1311  case locktag_ticket:
1312  case locktag_queuing:
1313  case locktag_drdpa:
1314  case locktag_nested_ticket:
1315  case locktag_nested_queuing:
1316  case locktag_nested_drdpa:
1317  return kmp_mutex_impl_queuing;
1318  default:
1319  return ompt_mutex_impl_unknown;
1320  }
1321 }
1322 #else
1323 // For locks without dynamic binding
1324 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1325  switch (__kmp_user_lock_kind) {
1326  case lk_tas:
1327  return kmp_mutex_impl_spin;
1328 #if KMP_USE_FUTEX
1329  case lk_futex:
1330 #endif
1331  case lk_ticket:
1332  case lk_queuing:
1333  case lk_drdpa:
1334  return kmp_mutex_impl_queuing;
1335 #if KMP_USE_TSX
1336  case lk_hle:
1337  case lk_rtm:
1338  case lk_adaptive:
1339  return kmp_mutex_impl_speculative;
1340 #endif
1341  default:
1342  return ompt_mutex_impl_unknown;
1343  }
1344 }
1345 #endif // KMP_USE_DYNAMIC_LOCK
1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1347 
1361 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1362  kmp_critical_name *crit, uint32_t hint) {
1363  KMP_COUNT_BLOCK(OMP_CRITICAL);
1364  kmp_user_lock_p lck;
1365 #if OMPT_SUPPORT && OMPT_OPTIONAL
1366  omp_state_t prev_state = omp_state_undefined;
1367  ompt_thread_info_t ti;
1368  // This is the case, if called from __kmpc_critical:
1369  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1370  if (!codeptr)
1371  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1372 #endif
1373 
1374  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1375 
1376  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1377  // Check if it is initialized.
1378  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1379  if (*lk == 0) {
1380  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1381  if (KMP_IS_D_LOCK(lckseq)) {
1382  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1383  KMP_GET_D_TAG(lckseq));
1384  } else {
1385  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1386  }
1387  }
1388  // Branch for accessing the actual lock object and set operation. This
1389  // branching is inevitable since this lock initialization does not follow the
1390  // normal dispatch path (lock table is not used).
1391  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1392  lck = (kmp_user_lock_p)lk;
1393  if (__kmp_env_consistency_check) {
1394  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1395  __kmp_map_hint_to_lock(hint));
1396  }
1397 #if USE_ITT_BUILD
1398  __kmp_itt_critical_acquiring(lck);
1399 #endif
1400 #if OMPT_SUPPORT && OMPT_OPTIONAL
1401  if (ompt_enabled.enabled) {
1402  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1403  /* OMPT state update */
1404  prev_state = ti.state;
1405  ti.wait_id = (omp_wait_id_t)lck;
1406  ti.state = omp_state_wait_critical;
1407 
1408  /* OMPT event callback */
1409  if (ompt_enabled.ompt_callback_mutex_acquire) {
1410  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1411  ompt_mutex_critical, (unsigned int)hint,
1412  __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
1413  }
1414  }
1415 #endif
1416 #if KMP_USE_INLINED_TAS
1417  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1418  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1419  } else
1420 #elif KMP_USE_INLINED_FUTEX
1421  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1422  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1423  } else
1424 #endif
1425  {
1426  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1427  }
1428  } else {
1429  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1430  lck = ilk->lock;
1431  if (__kmp_env_consistency_check) {
1432  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1433  __kmp_map_hint_to_lock(hint));
1434  }
1435 #if USE_ITT_BUILD
1436  __kmp_itt_critical_acquiring(lck);
1437 #endif
1438 #if OMPT_SUPPORT && OMPT_OPTIONAL
1439  if (ompt_enabled.enabled) {
1440  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1441  /* OMPT state update */
1442  prev_state = ti.state;
1443  ti.wait_id = (omp_wait_id_t)lck;
1444  ti.state = omp_state_wait_critical;
1445 
1446  /* OMPT event callback */
1447  if (ompt_enabled.ompt_callback_mutex_acquire) {
1448  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1449  ompt_mutex_critical, (unsigned int)hint,
1450  __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
1451  }
1452  }
1453 #endif
1454  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1455  }
1456  KMP_POP_PARTITIONED_TIMER();
1457 
1458 #if USE_ITT_BUILD
1459  __kmp_itt_critical_acquired(lck);
1460 #endif /* USE_ITT_BUILD */
1461 #if OMPT_SUPPORT && OMPT_OPTIONAL
1462  if (ompt_enabled.enabled) {
1463  /* OMPT state update */
1464  ti.state = prev_state;
1465  ti.wait_id = 0;
1466 
1467  /* OMPT event callback */
1468  if (ompt_enabled.ompt_callback_mutex_acquired) {
1469  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1470  ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
1471  }
1472  }
1473 #endif
1474 
1475  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1476  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1477 } // __kmpc_critical_with_hint
1478 
1479 #endif // KMP_USE_DYNAMIC_LOCK
1480 
1490 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1491  kmp_critical_name *crit) {
1492  kmp_user_lock_p lck;
1493 
1494  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1495 
1496 #if KMP_USE_DYNAMIC_LOCK
1497  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1498  lck = (kmp_user_lock_p)crit;
1499  KMP_ASSERT(lck != NULL);
1500  if (__kmp_env_consistency_check) {
1501  __kmp_pop_sync(global_tid, ct_critical, loc);
1502  }
1503 #if USE_ITT_BUILD
1504  __kmp_itt_critical_releasing(lck);
1505 #endif
1506 #if KMP_USE_INLINED_TAS
1507  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1508  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1509  } else
1510 #elif KMP_USE_INLINED_FUTEX
1511  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1512  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1513  } else
1514 #endif
1515  {
1516  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1517  }
1518  } else {
1519  kmp_indirect_lock_t *ilk =
1520  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1521  KMP_ASSERT(ilk != NULL);
1522  lck = ilk->lock;
1523  if (__kmp_env_consistency_check) {
1524  __kmp_pop_sync(global_tid, ct_critical, loc);
1525  }
1526 #if USE_ITT_BUILD
1527  __kmp_itt_critical_releasing(lck);
1528 #endif
1529  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1530  }
1531 
1532 #else // KMP_USE_DYNAMIC_LOCK
1533 
1534  if ((__kmp_user_lock_kind == lk_tas) &&
1535  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1536  lck = (kmp_user_lock_p)crit;
1537  }
1538 #if KMP_USE_FUTEX
1539  else if ((__kmp_user_lock_kind == lk_futex) &&
1540  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1541  lck = (kmp_user_lock_p)crit;
1542  }
1543 #endif
1544  else { // ticket, queuing or drdpa
1545  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1546  }
1547 
1548  KMP_ASSERT(lck != NULL);
1549 
1550  if (__kmp_env_consistency_check)
1551  __kmp_pop_sync(global_tid, ct_critical, loc);
1552 
1553 #if USE_ITT_BUILD
1554  __kmp_itt_critical_releasing(lck);
1555 #endif /* USE_ITT_BUILD */
1556  // Value of 'crit' should be good for using as a critical_id of the critical
1557  // section directive.
1558  __kmp_release_user_lock_with_checks(lck, global_tid);
1559 
1560 #endif // KMP_USE_DYNAMIC_LOCK
1561 
1562 #if OMPT_SUPPORT && OMPT_OPTIONAL
1563  /* OMPT release event triggers after lock is released; place here to trigger
1564  * for all #if branches */
1565  OMPT_STORE_RETURN_ADDRESS(global_tid);
1566  if (ompt_enabled.ompt_callback_mutex_released) {
1567  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1568  ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
1569  }
1570 #endif
1571 
1572  KMP_POP_PARTITIONED_TIMER();
1573  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1574 }
1575 
1585 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1586  int status;
1587 
1588  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1589 
1590  if (!TCR_4(__kmp_init_parallel))
1591  __kmp_parallel_initialize();
1592 
1593  if (__kmp_env_consistency_check)
1594  __kmp_check_barrier(global_tid, ct_barrier, loc);
1595 
1596 #if OMPT_SUPPORT
1597  omp_frame_t *ompt_frame;
1598  if (ompt_enabled.enabled) {
1599  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1600  if (ompt_frame->enter_frame == NULL)
1601  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1602  OMPT_STORE_RETURN_ADDRESS(global_tid);
1603  }
1604 #endif
1605 #if USE_ITT_NOTIFY
1606  __kmp_threads[global_tid]->th.th_ident = loc;
1607 #endif
1608  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1609 #if OMPT_SUPPORT && OMPT_OPTIONAL
1610  if (ompt_enabled.enabled) {
1611  ompt_frame->enter_frame = NULL;
1612  }
1613 #endif
1614 
1615  return (status != 0) ? 0 : 1;
1616 }
1617 
1627 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1628  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1629 
1630  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1631 }
1632 
1643 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1644  kmp_int32 ret;
1645 
1646  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1647 
1648  if (!TCR_4(__kmp_init_parallel))
1649  __kmp_parallel_initialize();
1650 
1651  if (__kmp_env_consistency_check) {
1652  if (loc == 0) {
1653  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1654  }
1655  __kmp_check_barrier(global_tid, ct_barrier, loc);
1656  }
1657 
1658 #if OMPT_SUPPORT
1659  omp_frame_t *ompt_frame;
1660  if (ompt_enabled.enabled) {
1661  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1662  if (ompt_frame->enter_frame == NULL)
1663  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1664  OMPT_STORE_RETURN_ADDRESS(global_tid);
1665  }
1666 #endif
1667 #if USE_ITT_NOTIFY
1668  __kmp_threads[global_tid]->th.th_ident = loc;
1669 #endif
1670  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1671 #if OMPT_SUPPORT && OMPT_OPTIONAL
1672  if (ompt_enabled.enabled) {
1673  ompt_frame->enter_frame = NULL;
1674  }
1675 #endif
1676 
1677  ret = __kmpc_master(loc, global_tid);
1678 
1679  if (__kmp_env_consistency_check) {
1680  /* there's no __kmpc_end_master called; so the (stats) */
1681  /* actions of __kmpc_end_master are done here */
1682 
1683  if (global_tid < 0) {
1684  KMP_WARNING(ThreadIdentInvalid);
1685  }
1686  if (ret) {
1687  /* only one thread should do the pop since only */
1688  /* one did the push (see __kmpc_master()) */
1689 
1690  __kmp_pop_sync(global_tid, ct_master, loc);
1691  }
1692  }
1693 
1694  return (ret);
1695 }
1696 
1697 /* The BARRIER for a SINGLE process section is always explicit */
1709 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1710  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1711 
1712  if (rc) {
1713  // We are going to execute the single statement, so we should count it.
1714  KMP_COUNT_BLOCK(OMP_SINGLE);
1715  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1716  }
1717 
1718 #if OMPT_SUPPORT && OMPT_OPTIONAL
1719  kmp_info_t *this_thr = __kmp_threads[global_tid];
1720  kmp_team_t *team = this_thr->th.th_team;
1721  int tid = __kmp_tid_from_gtid(global_tid);
1722 
1723  if (ompt_enabled.enabled) {
1724  if (rc) {
1725  if (ompt_enabled.ompt_callback_work) {
1726  ompt_callbacks.ompt_callback(ompt_callback_work)(
1727  ompt_work_single_executor, ompt_scope_begin,
1728  &(team->t.ompt_team_info.parallel_data),
1729  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1730  1, OMPT_GET_RETURN_ADDRESS(0));
1731  }
1732  } else {
1733  if (ompt_enabled.ompt_callback_work) {
1734  ompt_callbacks.ompt_callback(ompt_callback_work)(
1735  ompt_work_single_other, ompt_scope_begin,
1736  &(team->t.ompt_team_info.parallel_data),
1737  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1738  1, OMPT_GET_RETURN_ADDRESS(0));
1739  ompt_callbacks.ompt_callback(ompt_callback_work)(
1740  ompt_work_single_other, ompt_scope_end,
1741  &(team->t.ompt_team_info.parallel_data),
1742  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1743  1, OMPT_GET_RETURN_ADDRESS(0));
1744  }
1745  }
1746  }
1747 #endif
1748 
1749  return rc;
1750 }
1751 
1761 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1762  __kmp_exit_single(global_tid);
1763  KMP_POP_PARTITIONED_TIMER();
1764 
1765 #if OMPT_SUPPORT && OMPT_OPTIONAL
1766  kmp_info_t *this_thr = __kmp_threads[global_tid];
1767  kmp_team_t *team = this_thr->th.th_team;
1768  int tid = __kmp_tid_from_gtid(global_tid);
1769 
1770  if (ompt_enabled.ompt_callback_work) {
1771  ompt_callbacks.ompt_callback(ompt_callback_work)(
1772  ompt_work_single_executor, ompt_scope_end,
1773  &(team->t.ompt_team_info.parallel_data),
1774  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1775  OMPT_GET_RETURN_ADDRESS(0));
1776  }
1777 #endif
1778 }
1779 
1787 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1788  KMP_POP_PARTITIONED_TIMER();
1789  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1790 
1791 #if OMPT_SUPPORT && OMPT_OPTIONAL
1792  if (ompt_enabled.ompt_callback_work) {
1793  ompt_work_t ompt_work_type = ompt_work_loop;
1794  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1795  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1796  // Determine workshare type
1797  if (loc != NULL) {
1798  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1799  ompt_work_type = ompt_work_loop;
1800  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1801  ompt_work_type = ompt_work_sections;
1802  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1803  ompt_work_type = ompt_work_distribute;
1804  } else {
1805  // use default set above.
1806  // a warning about this case is provided in __kmpc_for_static_init
1807  }
1808  KMP_DEBUG_ASSERT(ompt_work_type);
1809  }
1810  ompt_callbacks.ompt_callback(ompt_callback_work)(
1811  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1812  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1813  }
1814 #endif
1815  if (__kmp_env_consistency_check)
1816  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1817 }
1818 
1819 // User routines which take C-style arguments (call by value)
1820 // different from the Fortran equivalent routines
1821 
1822 void ompc_set_num_threads(int arg) {
1823  // !!!!! TODO: check the per-task binding
1824  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1825 }
1826 
1827 void ompc_set_dynamic(int flag) {
1828  kmp_info_t *thread;
1829 
1830  /* For the thread-private implementation of the internal controls */
1831  thread = __kmp_entry_thread();
1832 
1833  __kmp_save_internal_controls(thread);
1834 
1835  set__dynamic(thread, flag ? TRUE : FALSE);
1836 }
1837 
1838 void ompc_set_nested(int flag) {
1839  kmp_info_t *thread;
1840 
1841  /* For the thread-private internal controls implementation */
1842  thread = __kmp_entry_thread();
1843 
1844  __kmp_save_internal_controls(thread);
1845 
1846  set__nested(thread, flag ? TRUE : FALSE);
1847 }
1848 
1849 void ompc_set_max_active_levels(int max_active_levels) {
1850  /* TO DO */
1851  /* we want per-task implementation of this internal control */
1852 
1853  /* For the per-thread internal controls implementation */
1854  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1855 }
1856 
1857 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1858  // !!!!! TODO: check the per-task binding
1859  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1860 }
1861 
1862 int ompc_get_ancestor_thread_num(int level) {
1863  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1864 }
1865 
1866 int ompc_get_team_size(int level) {
1867  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1868 }
1869 
1870 void kmpc_set_stacksize(int arg) {
1871  // __kmp_aux_set_stacksize initializes the library if needed
1872  __kmp_aux_set_stacksize(arg);
1873 }
1874 
1875 void kmpc_set_stacksize_s(size_t arg) {
1876  // __kmp_aux_set_stacksize initializes the library if needed
1877  __kmp_aux_set_stacksize(arg);
1878 }
1879 
1880 void kmpc_set_blocktime(int arg) {
1881  int gtid, tid;
1882  kmp_info_t *thread;
1883 
1884  gtid = __kmp_entry_gtid();
1885  tid = __kmp_tid_from_gtid(gtid);
1886  thread = __kmp_thread_from_gtid(gtid);
1887 
1888  __kmp_aux_set_blocktime(arg, thread, tid);
1889 }
1890 
1891 void kmpc_set_library(int arg) {
1892  // __kmp_user_set_library initializes the library if needed
1893  __kmp_user_set_library((enum library_type)arg);
1894 }
1895 
1896 void kmpc_set_defaults(char const *str) {
1897  // __kmp_aux_set_defaults initializes the library if needed
1898  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1899 }
1900 
1901 void kmpc_set_disp_num_buffers(int arg) {
1902  // ignore after initialization because some teams have already
1903  // allocated dispatch buffers
1904  if (__kmp_init_serial == 0 && arg > 0)
1905  __kmp_dispatch_num_buffers = arg;
1906 }
1907 
1908 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1909 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1910  return -1;
1911 #else
1912  if (!TCR_4(__kmp_init_middle)) {
1913  __kmp_middle_initialize();
1914  }
1915  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1916 #endif
1917 }
1918 
1919 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1920 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1921  return -1;
1922 #else
1923  if (!TCR_4(__kmp_init_middle)) {
1924  __kmp_middle_initialize();
1925  }
1926  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
1927 #endif
1928 }
1929 
1930 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
1931 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1932  return -1;
1933 #else
1934  if (!TCR_4(__kmp_init_middle)) {
1935  __kmp_middle_initialize();
1936  }
1937  return __kmp_aux_get_affinity_mask_proc(proc, mask);
1938 #endif
1939 }
1940 
1941 /* -------------------------------------------------------------------------- */
1986 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
1987  void *cpy_data, void (*cpy_func)(void *, void *),
1988  kmp_int32 didit) {
1989  void **data_ptr;
1990 
1991  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
1992 
1993  KMP_MB();
1994 
1995  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
1996 
1997  if (__kmp_env_consistency_check) {
1998  if (loc == 0) {
1999  KMP_WARNING(ConstructIdentInvalid);
2000  }
2001  }
2002 
2003  // ToDo: Optimize the following two barriers into some kind of split barrier
2004 
2005  if (didit)
2006  *data_ptr = cpy_data;
2007 
2008 #if OMPT_SUPPORT
2009  omp_frame_t *ompt_frame;
2010  if (ompt_enabled.enabled) {
2011  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2012  if (ompt_frame->enter_frame == NULL)
2013  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
2014  OMPT_STORE_RETURN_ADDRESS(gtid);
2015  }
2016 #endif
2017 /* This barrier is not a barrier region boundary */
2018 #if USE_ITT_NOTIFY
2019  __kmp_threads[gtid]->th.th_ident = loc;
2020 #endif
2021  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2022 
2023  if (!didit)
2024  (*cpy_func)(cpy_data, *data_ptr);
2025 
2026 // Consider next barrier a user-visible barrier for barrier region boundaries
2027 // Nesting checks are already handled by the single construct checks
2028 
2029 #if OMPT_SUPPORT
2030  if (ompt_enabled.enabled) {
2031  OMPT_STORE_RETURN_ADDRESS(gtid);
2032  }
2033 #endif
2034 #if USE_ITT_NOTIFY
2035  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2036 // tasks can overwrite the location)
2037 #endif
2038  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2039 #if OMPT_SUPPORT && OMPT_OPTIONAL
2040  if (ompt_enabled.enabled) {
2041  ompt_frame->enter_frame = NULL;
2042  }
2043 #endif
2044 }
2045 
2046 /* -------------------------------------------------------------------------- */
2047 
2048 #define INIT_LOCK __kmp_init_user_lock_with_checks
2049 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2050 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2051 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2052 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2053 #define ACQUIRE_NESTED_LOCK_TIMED \
2054  __kmp_acquire_nested_user_lock_with_checks_timed
2055 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2056 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2057 #define TEST_LOCK __kmp_test_user_lock_with_checks
2058 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2059 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2060 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2061 
2062 // TODO: Make check abort messages use location info & pass it into
2063 // with_checks routines
2064 
2065 #if KMP_USE_DYNAMIC_LOCK
2066 
2067 // internal lock initializer
2068 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2069  kmp_dyna_lockseq_t seq) {
2070  if (KMP_IS_D_LOCK(seq)) {
2071  KMP_INIT_D_LOCK(lock, seq);
2072 #if USE_ITT_BUILD
2073  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2074 #endif
2075  } else {
2076  KMP_INIT_I_LOCK(lock, seq);
2077 #if USE_ITT_BUILD
2078  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2079  __kmp_itt_lock_creating(ilk->lock, loc);
2080 #endif
2081  }
2082 }
2083 
2084 // internal nest lock initializer
2085 static __forceinline void
2086 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2087  kmp_dyna_lockseq_t seq) {
2088 #if KMP_USE_TSX
2089  // Don't have nested lock implementation for speculative locks
2090  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2091  seq = __kmp_user_lock_seq;
2092 #endif
2093  switch (seq) {
2094  case lockseq_tas:
2095  seq = lockseq_nested_tas;
2096  break;
2097 #if KMP_USE_FUTEX
2098  case lockseq_futex:
2099  seq = lockseq_nested_futex;
2100  break;
2101 #endif
2102  case lockseq_ticket:
2103  seq = lockseq_nested_ticket;
2104  break;
2105  case lockseq_queuing:
2106  seq = lockseq_nested_queuing;
2107  break;
2108  case lockseq_drdpa:
2109  seq = lockseq_nested_drdpa;
2110  break;
2111  default:
2112  seq = lockseq_nested_queuing;
2113  }
2114  KMP_INIT_I_LOCK(lock, seq);
2115 #if USE_ITT_BUILD
2116  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2117  __kmp_itt_lock_creating(ilk->lock, loc);
2118 #endif
2119 }
2120 
2121 /* initialize the lock with a hint */
2122 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2123  uintptr_t hint) {
2124  KMP_DEBUG_ASSERT(__kmp_init_serial);
2125  if (__kmp_env_consistency_check && user_lock == NULL) {
2126  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2127  }
2128 
2129  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2130 
2131 #if OMPT_SUPPORT && OMPT_OPTIONAL
2132  // This is the case, if called from omp_init_lock_with_hint:
2133  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2134  if (!codeptr)
2135  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2136  if (ompt_enabled.ompt_callback_lock_init) {
2137  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2138  ompt_mutex_lock, (omp_lock_hint_t)hint,
2139  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2140  codeptr);
2141  }
2142 #endif
2143 }
2144 
2145 /* initialize the lock with a hint */
2146 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2147  void **user_lock, uintptr_t hint) {
2148  KMP_DEBUG_ASSERT(__kmp_init_serial);
2149  if (__kmp_env_consistency_check && user_lock == NULL) {
2150  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2151  }
2152 
2153  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2154 
2155 #if OMPT_SUPPORT && OMPT_OPTIONAL
2156  // This is the case, if called from omp_init_lock_with_hint:
2157  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2158  if (!codeptr)
2159  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2160  if (ompt_enabled.ompt_callback_lock_init) {
2161  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2162  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2163  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2164  codeptr);
2165  }
2166 #endif
2167 }
2168 
2169 #endif // KMP_USE_DYNAMIC_LOCK
2170 
2171 /* initialize the lock */
2172 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2173 #if KMP_USE_DYNAMIC_LOCK
2174 
2175  KMP_DEBUG_ASSERT(__kmp_init_serial);
2176  if (__kmp_env_consistency_check && user_lock == NULL) {
2177  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2178  }
2179  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2180 
2181 #if OMPT_SUPPORT && OMPT_OPTIONAL
2182  // This is the case, if called from omp_init_lock_with_hint:
2183  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2184  if (!codeptr)
2185  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2186  if (ompt_enabled.ompt_callback_lock_init) {
2187  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2188  ompt_mutex_lock, omp_lock_hint_none,
2189  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2190  codeptr);
2191  }
2192 #endif
2193 
2194 #else // KMP_USE_DYNAMIC_LOCK
2195 
2196  static char const *const func = "omp_init_lock";
2197  kmp_user_lock_p lck;
2198  KMP_DEBUG_ASSERT(__kmp_init_serial);
2199 
2200  if (__kmp_env_consistency_check) {
2201  if (user_lock == NULL) {
2202  KMP_FATAL(LockIsUninitialized, func);
2203  }
2204  }
2205 
2206  KMP_CHECK_USER_LOCK_INIT();
2207 
2208  if ((__kmp_user_lock_kind == lk_tas) &&
2209  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2210  lck = (kmp_user_lock_p)user_lock;
2211  }
2212 #if KMP_USE_FUTEX
2213  else if ((__kmp_user_lock_kind == lk_futex) &&
2214  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2215  lck = (kmp_user_lock_p)user_lock;
2216  }
2217 #endif
2218  else {
2219  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2220  }
2221  INIT_LOCK(lck);
2222  __kmp_set_user_lock_location(lck, loc);
2223 
2224 #if OMPT_SUPPORT && OMPT_OPTIONAL
2225  // This is the case, if called from omp_init_lock_with_hint:
2226  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2227  if (!codeptr)
2228  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2229  if (ompt_enabled.ompt_callback_lock_init) {
2230  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2231  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2232  (omp_wait_id_t)user_lock, codeptr);
2233  }
2234 #endif
2235 
2236 #if USE_ITT_BUILD
2237  __kmp_itt_lock_creating(lck);
2238 #endif /* USE_ITT_BUILD */
2239 
2240 #endif // KMP_USE_DYNAMIC_LOCK
2241 } // __kmpc_init_lock
2242 
2243 /* initialize the lock */
2244 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2245 #if KMP_USE_DYNAMIC_LOCK
2246 
2247  KMP_DEBUG_ASSERT(__kmp_init_serial);
2248  if (__kmp_env_consistency_check && user_lock == NULL) {
2249  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2250  }
2251  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2252 
2253 #if OMPT_SUPPORT && OMPT_OPTIONAL
2254  // This is the case, if called from omp_init_lock_with_hint:
2255  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2256  if (!codeptr)
2257  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2258  if (ompt_enabled.ompt_callback_lock_init) {
2259  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2260  ompt_mutex_nest_lock, omp_lock_hint_none,
2261  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2262  codeptr);
2263  }
2264 #endif
2265 
2266 #else // KMP_USE_DYNAMIC_LOCK
2267 
2268  static char const *const func = "omp_init_nest_lock";
2269  kmp_user_lock_p lck;
2270  KMP_DEBUG_ASSERT(__kmp_init_serial);
2271 
2272  if (__kmp_env_consistency_check) {
2273  if (user_lock == NULL) {
2274  KMP_FATAL(LockIsUninitialized, func);
2275  }
2276  }
2277 
2278  KMP_CHECK_USER_LOCK_INIT();
2279 
2280  if ((__kmp_user_lock_kind == lk_tas) &&
2281  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2282  OMP_NEST_LOCK_T_SIZE)) {
2283  lck = (kmp_user_lock_p)user_lock;
2284  }
2285 #if KMP_USE_FUTEX
2286  else if ((__kmp_user_lock_kind == lk_futex) &&
2287  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2288  OMP_NEST_LOCK_T_SIZE)) {
2289  lck = (kmp_user_lock_p)user_lock;
2290  }
2291 #endif
2292  else {
2293  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2294  }
2295 
2296  INIT_NESTED_LOCK(lck);
2297  __kmp_set_user_lock_location(lck, loc);
2298 
2299 #if OMPT_SUPPORT && OMPT_OPTIONAL
2300  // This is the case, if called from omp_init_lock_with_hint:
2301  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2302  if (!codeptr)
2303  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2304  if (ompt_enabled.ompt_callback_lock_init) {
2305  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2306  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2307  (omp_wait_id_t)user_lock, codeptr);
2308  }
2309 #endif
2310 
2311 #if USE_ITT_BUILD
2312  __kmp_itt_lock_creating(lck);
2313 #endif /* USE_ITT_BUILD */
2314 
2315 #endif // KMP_USE_DYNAMIC_LOCK
2316 } // __kmpc_init_nest_lock
2317 
2318 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2319 #if KMP_USE_DYNAMIC_LOCK
2320 
2321 #if USE_ITT_BUILD
2322  kmp_user_lock_p lck;
2323  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2324  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2325  } else {
2326  lck = (kmp_user_lock_p)user_lock;
2327  }
2328  __kmp_itt_lock_destroyed(lck);
2329 #endif
2330 #if OMPT_SUPPORT && OMPT_OPTIONAL
2331  // This is the case, if called from omp_init_lock_with_hint:
2332  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2333  if (!codeptr)
2334  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2335  if (ompt_enabled.ompt_callback_lock_destroy) {
2336  kmp_user_lock_p lck;
2337  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2338  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2339  } else {
2340  lck = (kmp_user_lock_p)user_lock;
2341  }
2342  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2343  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2344  }
2345 #endif
2346  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2347 #else
2348  kmp_user_lock_p lck;
2349 
2350  if ((__kmp_user_lock_kind == lk_tas) &&
2351  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2352  lck = (kmp_user_lock_p)user_lock;
2353  }
2354 #if KMP_USE_FUTEX
2355  else if ((__kmp_user_lock_kind == lk_futex) &&
2356  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2357  lck = (kmp_user_lock_p)user_lock;
2358  }
2359 #endif
2360  else {
2361  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2362  }
2363 
2364 #if OMPT_SUPPORT && OMPT_OPTIONAL
2365  // This is the case, if called from omp_init_lock_with_hint:
2366  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2367  if (!codeptr)
2368  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2369  if (ompt_enabled.ompt_callback_lock_destroy) {
2370  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2371  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2372  }
2373 #endif
2374 
2375 #if USE_ITT_BUILD
2376  __kmp_itt_lock_destroyed(lck);
2377 #endif /* USE_ITT_BUILD */
2378  DESTROY_LOCK(lck);
2379 
2380  if ((__kmp_user_lock_kind == lk_tas) &&
2381  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2382  ;
2383  }
2384 #if KMP_USE_FUTEX
2385  else if ((__kmp_user_lock_kind == lk_futex) &&
2386  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2387  ;
2388  }
2389 #endif
2390  else {
2391  __kmp_user_lock_free(user_lock, gtid, lck);
2392  }
2393 #endif // KMP_USE_DYNAMIC_LOCK
2394 } // __kmpc_destroy_lock
2395 
2396 /* destroy the lock */
2397 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2398 #if KMP_USE_DYNAMIC_LOCK
2399 
2400 #if USE_ITT_BUILD
2401  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2402  __kmp_itt_lock_destroyed(ilk->lock);
2403 #endif
2404 #if OMPT_SUPPORT && OMPT_OPTIONAL
2405  // This is the case, if called from omp_init_lock_with_hint:
2406  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2407  if (!codeptr)
2408  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2409  if (ompt_enabled.ompt_callback_lock_destroy) {
2410  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2411  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2412  }
2413 #endif
2414  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2415 
2416 #else // KMP_USE_DYNAMIC_LOCK
2417 
2418  kmp_user_lock_p lck;
2419 
2420  if ((__kmp_user_lock_kind == lk_tas) &&
2421  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2422  OMP_NEST_LOCK_T_SIZE)) {
2423  lck = (kmp_user_lock_p)user_lock;
2424  }
2425 #if KMP_USE_FUTEX
2426  else if ((__kmp_user_lock_kind == lk_futex) &&
2427  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2428  OMP_NEST_LOCK_T_SIZE)) {
2429  lck = (kmp_user_lock_p)user_lock;
2430  }
2431 #endif
2432  else {
2433  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2434  }
2435 
2436 #if OMPT_SUPPORT && OMPT_OPTIONAL
2437  // This is the case, if called from omp_init_lock_with_hint:
2438  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2439  if (!codeptr)
2440  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2441  if (ompt_enabled.ompt_callback_lock_destroy) {
2442  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2443  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2444  }
2445 #endif
2446 
2447 #if USE_ITT_BUILD
2448  __kmp_itt_lock_destroyed(lck);
2449 #endif /* USE_ITT_BUILD */
2450 
2451  DESTROY_NESTED_LOCK(lck);
2452 
2453  if ((__kmp_user_lock_kind == lk_tas) &&
2454  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2455  OMP_NEST_LOCK_T_SIZE)) {
2456  ;
2457  }
2458 #if KMP_USE_FUTEX
2459  else if ((__kmp_user_lock_kind == lk_futex) &&
2460  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2461  OMP_NEST_LOCK_T_SIZE)) {
2462  ;
2463  }
2464 #endif
2465  else {
2466  __kmp_user_lock_free(user_lock, gtid, lck);
2467  }
2468 #endif // KMP_USE_DYNAMIC_LOCK
2469 } // __kmpc_destroy_nest_lock
2470 
2471 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2472  KMP_COUNT_BLOCK(OMP_set_lock);
2473 #if KMP_USE_DYNAMIC_LOCK
2474  int tag = KMP_EXTRACT_D_TAG(user_lock);
2475 #if USE_ITT_BUILD
2476  __kmp_itt_lock_acquiring(
2477  (kmp_user_lock_p)
2478  user_lock); // itt function will get to the right lock object.
2479 #endif
2480 #if OMPT_SUPPORT && OMPT_OPTIONAL
2481  // This is the case, if called from omp_init_lock_with_hint:
2482  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2483  if (!codeptr)
2484  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2485  if (ompt_enabled.ompt_callback_mutex_acquire) {
2486  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2487  ompt_mutex_lock, omp_lock_hint_none,
2488  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2489  codeptr);
2490  }
2491 #endif
2492 #if KMP_USE_INLINED_TAS
2493  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2494  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2495  } else
2496 #elif KMP_USE_INLINED_FUTEX
2497  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2498  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2499  } else
2500 #endif
2501  {
2502  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2503  }
2504 #if USE_ITT_BUILD
2505  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2506 #endif
2507 #if OMPT_SUPPORT && OMPT_OPTIONAL
2508  if (ompt_enabled.ompt_callback_mutex_acquired) {
2509  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2510  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2511  }
2512 #endif
2513 
2514 #else // KMP_USE_DYNAMIC_LOCK
2515 
2516  kmp_user_lock_p lck;
2517 
2518  if ((__kmp_user_lock_kind == lk_tas) &&
2519  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2520  lck = (kmp_user_lock_p)user_lock;
2521  }
2522 #if KMP_USE_FUTEX
2523  else if ((__kmp_user_lock_kind == lk_futex) &&
2524  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2525  lck = (kmp_user_lock_p)user_lock;
2526  }
2527 #endif
2528  else {
2529  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2530  }
2531 
2532 #if USE_ITT_BUILD
2533  __kmp_itt_lock_acquiring(lck);
2534 #endif /* USE_ITT_BUILD */
2535 #if OMPT_SUPPORT && OMPT_OPTIONAL
2536  // This is the case, if called from omp_init_lock_with_hint:
2537  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2538  if (!codeptr)
2539  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2540  if (ompt_enabled.ompt_callback_mutex_acquire) {
2541  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2542  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2543  (omp_wait_id_t)lck, codeptr);
2544  }
2545 #endif
2546 
2547  ACQUIRE_LOCK(lck, gtid);
2548 
2549 #if USE_ITT_BUILD
2550  __kmp_itt_lock_acquired(lck);
2551 #endif /* USE_ITT_BUILD */
2552 
2553 #if OMPT_SUPPORT && OMPT_OPTIONAL
2554  if (ompt_enabled.ompt_callback_mutex_acquired) {
2555  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2556  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2557  }
2558 #endif
2559 
2560 #endif // KMP_USE_DYNAMIC_LOCK
2561 }
2562 
2563 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2564 #if KMP_USE_DYNAMIC_LOCK
2565 
2566 #if USE_ITT_BUILD
2567  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2568 #endif
2569 #if OMPT_SUPPORT && OMPT_OPTIONAL
2570  // This is the case, if called from omp_init_lock_with_hint:
2571  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2572  if (!codeptr)
2573  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2574  if (ompt_enabled.enabled) {
2575  if (ompt_enabled.ompt_callback_mutex_acquire) {
2576  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2577  ompt_mutex_nest_lock, omp_lock_hint_none,
2578  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2579  codeptr);
2580  }
2581  }
2582 #endif
2583  int acquire_status =
2584  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2585  (void) acquire_status;
2586 #if USE_ITT_BUILD
2587  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2588 #endif
2589 
2590 #if OMPT_SUPPORT && OMPT_OPTIONAL
2591  if (ompt_enabled.enabled) {
2592  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2593  if (ompt_enabled.ompt_callback_mutex_acquired) {
2594  // lock_first
2595  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2596  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2597  }
2598  } else {
2599  if (ompt_enabled.ompt_callback_nest_lock) {
2600  // lock_next
2601  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2602  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
2603  }
2604  }
2605  }
2606 #endif
2607 
2608 #else // KMP_USE_DYNAMIC_LOCK
2609  int acquire_status;
2610  kmp_user_lock_p lck;
2611 
2612  if ((__kmp_user_lock_kind == lk_tas) &&
2613  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2614  OMP_NEST_LOCK_T_SIZE)) {
2615  lck = (kmp_user_lock_p)user_lock;
2616  }
2617 #if KMP_USE_FUTEX
2618  else if ((__kmp_user_lock_kind == lk_futex) &&
2619  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2620  OMP_NEST_LOCK_T_SIZE)) {
2621  lck = (kmp_user_lock_p)user_lock;
2622  }
2623 #endif
2624  else {
2625  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2626  }
2627 
2628 #if USE_ITT_BUILD
2629  __kmp_itt_lock_acquiring(lck);
2630 #endif /* USE_ITT_BUILD */
2631 #if OMPT_SUPPORT && OMPT_OPTIONAL
2632  // This is the case, if called from omp_init_lock_with_hint:
2633  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2634  if (!codeptr)
2635  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2636  if (ompt_enabled.enabled) {
2637  if (ompt_enabled.ompt_callback_mutex_acquire) {
2638  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2639  ompt_mutex_nest_lock, omp_lock_hint_none,
2640  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
2641  }
2642  }
2643 #endif
2644 
2645  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2646 
2647 #if USE_ITT_BUILD
2648  __kmp_itt_lock_acquired(lck);
2649 #endif /* USE_ITT_BUILD */
2650 
2651 #if OMPT_SUPPORT && OMPT_OPTIONAL
2652  if (ompt_enabled.enabled) {
2653  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2654  if (ompt_enabled.ompt_callback_mutex_acquired) {
2655  // lock_first
2656  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2657  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2658  }
2659  } else {
2660  if (ompt_enabled.ompt_callback_nest_lock) {
2661  // lock_next
2662  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2663  ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
2664  }
2665  }
2666  }
2667 #endif
2668 
2669 #endif // KMP_USE_DYNAMIC_LOCK
2670 }
2671 
2672 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2673 #if KMP_USE_DYNAMIC_LOCK
2674 
2675  int tag = KMP_EXTRACT_D_TAG(user_lock);
2676 #if USE_ITT_BUILD
2677  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2678 #endif
2679 #if KMP_USE_INLINED_TAS
2680  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2681  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2682  } else
2683 #elif KMP_USE_INLINED_FUTEX
2684  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2685  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2686  } else
2687 #endif
2688  {
2689  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2690  }
2691 
2692 #if OMPT_SUPPORT && OMPT_OPTIONAL
2693  // This is the case, if called from omp_init_lock_with_hint:
2694  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2695  if (!codeptr)
2696  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2697  if (ompt_enabled.ompt_callback_mutex_released) {
2698  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2699  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2700  }
2701 #endif
2702 
2703 #else // KMP_USE_DYNAMIC_LOCK
2704 
2705  kmp_user_lock_p lck;
2706 
2707  /* Can't use serial interval since not block structured */
2708  /* release the lock */
2709 
2710  if ((__kmp_user_lock_kind == lk_tas) &&
2711  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2712 #if KMP_OS_LINUX && \
2713  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2714 // "fast" path implemented to fix customer performance issue
2715 #if USE_ITT_BUILD
2716  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2717 #endif /* USE_ITT_BUILD */
2718  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2719  KMP_MB();
2720 
2721 #if OMPT_SUPPORT && OMPT_OPTIONAL
2722  // This is the case, if called from omp_init_lock_with_hint:
2723  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2724  if (!codeptr)
2725  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2726  if (ompt_enabled.ompt_callback_mutex_released) {
2727  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2728  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2729  }
2730 #endif
2731 
2732  return;
2733 #else
2734  lck = (kmp_user_lock_p)user_lock;
2735 #endif
2736  }
2737 #if KMP_USE_FUTEX
2738  else if ((__kmp_user_lock_kind == lk_futex) &&
2739  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2740  lck = (kmp_user_lock_p)user_lock;
2741  }
2742 #endif
2743  else {
2744  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2745  }
2746 
2747 #if USE_ITT_BUILD
2748  __kmp_itt_lock_releasing(lck);
2749 #endif /* USE_ITT_BUILD */
2750 
2751  RELEASE_LOCK(lck, gtid);
2752 
2753 #if OMPT_SUPPORT && OMPT_OPTIONAL
2754  // This is the case, if called from omp_init_lock_with_hint:
2755  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2756  if (!codeptr)
2757  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2758  if (ompt_enabled.ompt_callback_mutex_released) {
2759  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2760  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2761  }
2762 #endif
2763 
2764 #endif // KMP_USE_DYNAMIC_LOCK
2765 }
2766 
2767 /* release the lock */
2768 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2769 #if KMP_USE_DYNAMIC_LOCK
2770 
2771 #if USE_ITT_BUILD
2772  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2773 #endif
2774  int release_status =
2775  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2776  (void) release_status;
2777 
2778 #if OMPT_SUPPORT && OMPT_OPTIONAL
2779  // This is the case, if called from omp_init_lock_with_hint:
2780  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2781  if (!codeptr)
2782  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2783  if (ompt_enabled.enabled) {
2784  if (release_status == KMP_LOCK_RELEASED) {
2785  if (ompt_enabled.ompt_callback_mutex_released) {
2786  // release_lock_last
2787  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2788  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
2789  }
2790  } else if (ompt_enabled.ompt_callback_nest_lock) {
2791  // release_lock_prev
2792  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2793  ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
2794  }
2795  }
2796 #endif
2797 
2798 #else // KMP_USE_DYNAMIC_LOCK
2799 
2800  kmp_user_lock_p lck;
2801 
2802  /* Can't use serial interval since not block structured */
2803 
2804  if ((__kmp_user_lock_kind == lk_tas) &&
2805  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2806  OMP_NEST_LOCK_T_SIZE)) {
2807 #if KMP_OS_LINUX && \
2808  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2809  // "fast" path implemented to fix customer performance issue
2810  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2811 #if USE_ITT_BUILD
2812  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2813 #endif /* USE_ITT_BUILD */
2814 
2815 #if OMPT_SUPPORT && OMPT_OPTIONAL
2816  int release_status = KMP_LOCK_STILL_HELD;
2817 #endif
2818 
2819  if (--(tl->lk.depth_locked) == 0) {
2820  TCW_4(tl->lk.poll, 0);
2821 #if OMPT_SUPPORT && OMPT_OPTIONAL
2822  release_status = KMP_LOCK_RELEASED;
2823 #endif
2824  }
2825  KMP_MB();
2826 
2827 #if OMPT_SUPPORT && OMPT_OPTIONAL
2828  // This is the case, if called from omp_init_lock_with_hint:
2829  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2830  if (!codeptr)
2831  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2832  if (ompt_enabled.enabled) {
2833  if (release_status == KMP_LOCK_RELEASED) {
2834  if (ompt_enabled.ompt_callback_mutex_released) {
2835  // release_lock_last
2836  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2837  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2838  }
2839  } else if (ompt_enabled.ompt_callback_nest_lock) {
2840  // release_lock_previous
2841  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2842  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2843  }
2844  }
2845 #endif
2846 
2847  return;
2848 #else
2849  lck = (kmp_user_lock_p)user_lock;
2850 #endif
2851  }
2852 #if KMP_USE_FUTEX
2853  else if ((__kmp_user_lock_kind == lk_futex) &&
2854  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2855  OMP_NEST_LOCK_T_SIZE)) {
2856  lck = (kmp_user_lock_p)user_lock;
2857  }
2858 #endif
2859  else {
2860  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2861  }
2862 
2863 #if USE_ITT_BUILD
2864  __kmp_itt_lock_releasing(lck);
2865 #endif /* USE_ITT_BUILD */
2866 
2867  int release_status;
2868  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2869 #if OMPT_SUPPORT && OMPT_OPTIONAL
2870  // This is the case, if called from omp_init_lock_with_hint:
2871  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2872  if (!codeptr)
2873  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2874  if (ompt_enabled.enabled) {
2875  if (release_status == KMP_LOCK_RELEASED) {
2876  if (ompt_enabled.ompt_callback_mutex_released) {
2877  // release_lock_last
2878  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2879  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
2880  }
2881  } else if (ompt_enabled.ompt_callback_nest_lock) {
2882  // release_lock_previous
2883  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2884  ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
2885  }
2886  }
2887 #endif
2888 
2889 #endif // KMP_USE_DYNAMIC_LOCK
2890 }
2891 
2892 /* try to acquire the lock */
2893 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2894  KMP_COUNT_BLOCK(OMP_test_lock);
2895 
2896 #if KMP_USE_DYNAMIC_LOCK
2897  int rc;
2898  int tag = KMP_EXTRACT_D_TAG(user_lock);
2899 #if USE_ITT_BUILD
2900  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2901 #endif
2902 #if OMPT_SUPPORT && OMPT_OPTIONAL
2903  // This is the case, if called from omp_init_lock_with_hint:
2904  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2905  if (!codeptr)
2906  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2907  if (ompt_enabled.ompt_callback_mutex_acquire) {
2908  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2909  ompt_mutex_lock, omp_lock_hint_none,
2910  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
2911  codeptr);
2912  }
2913 #endif
2914 #if KMP_USE_INLINED_TAS
2915  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2916  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2917  } else
2918 #elif KMP_USE_INLINED_FUTEX
2919  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2920  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
2921  } else
2922 #endif
2923  {
2924  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2925  }
2926  if (rc) {
2927 #if USE_ITT_BUILD
2928  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2929 #endif
2930 #if OMPT_SUPPORT && OMPT_OPTIONAL
2931  if (ompt_enabled.ompt_callback_mutex_acquired) {
2932  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2933  ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
2934  }
2935 #endif
2936  return FTN_TRUE;
2937  } else {
2938 #if USE_ITT_BUILD
2939  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
2940 #endif
2941  return FTN_FALSE;
2942  }
2943 
2944 #else // KMP_USE_DYNAMIC_LOCK
2945 
2946  kmp_user_lock_p lck;
2947  int rc;
2948 
2949  if ((__kmp_user_lock_kind == lk_tas) &&
2950  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2951  lck = (kmp_user_lock_p)user_lock;
2952  }
2953 #if KMP_USE_FUTEX
2954  else if ((__kmp_user_lock_kind == lk_futex) &&
2955  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2956  lck = (kmp_user_lock_p)user_lock;
2957  }
2958 #endif
2959  else {
2960  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
2961  }
2962 
2963 #if USE_ITT_BUILD
2964  __kmp_itt_lock_acquiring(lck);
2965 #endif /* USE_ITT_BUILD */
2966 #if OMPT_SUPPORT && OMPT_OPTIONAL
2967  // This is the case, if called from omp_init_lock_with_hint:
2968  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2969  if (!codeptr)
2970  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2971  if (ompt_enabled.ompt_callback_mutex_acquire) {
2972  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2973  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2974  (omp_wait_id_t)lck, codeptr);
2975  }
2976 #endif
2977 
2978  rc = TEST_LOCK(lck, gtid);
2979 #if USE_ITT_BUILD
2980  if (rc) {
2981  __kmp_itt_lock_acquired(lck);
2982  } else {
2983  __kmp_itt_lock_cancelled(lck);
2984  }
2985 #endif /* USE_ITT_BUILD */
2986 #if OMPT_SUPPORT && OMPT_OPTIONAL
2987  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
2988  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2989  ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
2990  }
2991 #endif
2992 
2993  return (rc ? FTN_TRUE : FTN_FALSE);
2994 
2995 /* Can't use serial interval since not block structured */
2996 
2997 #endif // KMP_USE_DYNAMIC_LOCK
2998 }
2999 
3000 /* try to acquire the lock */
3001 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3002 #if KMP_USE_DYNAMIC_LOCK
3003  int rc;
3004 #if USE_ITT_BUILD
3005  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3006 #endif
3007 #if OMPT_SUPPORT && OMPT_OPTIONAL
3008  // This is the case, if called from omp_init_lock_with_hint:
3009  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3010  if (!codeptr)
3011  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3012  if (ompt_enabled.ompt_callback_mutex_acquire) {
3013  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3014  ompt_mutex_nest_lock, omp_lock_hint_none,
3015  __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
3016  codeptr);
3017  }
3018 #endif
3019  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3020 #if USE_ITT_BUILD
3021  if (rc) {
3022  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3023  } else {
3024  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3025  }
3026 #endif
3027 #if OMPT_SUPPORT && OMPT_OPTIONAL
3028  if (ompt_enabled.enabled && rc) {
3029  if (rc == 1) {
3030  if (ompt_enabled.ompt_callback_mutex_acquired) {
3031  // lock_first
3032  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3033  ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
3034  }
3035  } else {
3036  if (ompt_enabled.ompt_callback_nest_lock) {
3037  // lock_next
3038  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3039  ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
3040  }
3041  }
3042  }
3043 #endif
3044  return rc;
3045 
3046 #else // KMP_USE_DYNAMIC_LOCK
3047 
3048  kmp_user_lock_p lck;
3049  int rc;
3050 
3051  if ((__kmp_user_lock_kind == lk_tas) &&
3052  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3053  OMP_NEST_LOCK_T_SIZE)) {
3054  lck = (kmp_user_lock_p)user_lock;
3055  }
3056 #if KMP_USE_FUTEX
3057  else if ((__kmp_user_lock_kind == lk_futex) &&
3058  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3059  OMP_NEST_LOCK_T_SIZE)) {
3060  lck = (kmp_user_lock_p)user_lock;
3061  }
3062 #endif
3063  else {
3064  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3065  }
3066 
3067 #if USE_ITT_BUILD
3068  __kmp_itt_lock_acquiring(lck);
3069 #endif /* USE_ITT_BUILD */
3070 
3071 #if OMPT_SUPPORT && OMPT_OPTIONAL
3072  // This is the case, if called from omp_init_lock_with_hint:
3073  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3074  if (!codeptr)
3075  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3076  if (ompt_enabled.enabled) &&
3077  ompt_enabled.ompt_callback_mutex_acquire) {
3078  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3079  ompt_mutex_nest_lock, omp_lock_hint_none,
3080  __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
3081  }
3082 #endif
3083 
3084  rc = TEST_NESTED_LOCK(lck, gtid);
3085 #if USE_ITT_BUILD
3086  if (rc) {
3087  __kmp_itt_lock_acquired(lck);
3088  } else {
3089  __kmp_itt_lock_cancelled(lck);
3090  }
3091 #endif /* USE_ITT_BUILD */
3092 #if OMPT_SUPPORT && OMPT_OPTIONAL
3093  if (ompt_enabled.enabled && rc) {
3094  if (rc == 1) {
3095  if (ompt_enabled.ompt_callback_mutex_acquired) {
3096  // lock_first
3097  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3098  ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
3099  }
3100  } else {
3101  if (ompt_enabled.ompt_callback_nest_lock) {
3102  // lock_next
3103  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3104  ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
3105  }
3106  }
3107  }
3108 #endif
3109  return rc;
3110 
3111 /* Can't use serial interval since not block structured */
3112 
3113 #endif // KMP_USE_DYNAMIC_LOCK
3114 }
3115 
3116 // Interface to fast scalable reduce methods routines
3117 
3118 // keep the selected method in a thread local structure for cross-function
3119 // usage: will be used in __kmpc_end_reduce* functions;
3120 // another solution: to re-determine the method one more time in
3121 // __kmpc_end_reduce* functions (new prototype required then)
3122 // AT: which solution is better?
3123 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3124  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3125 
3126 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3127  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3128 
3129 // description of the packed_reduction_method variable: look at the macros in
3130 // kmp.h
3131 
3132 // used in a critical section reduce block
3133 static __forceinline void
3134 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3135  kmp_critical_name *crit) {
3136 
3137  // this lock was visible to a customer and to the threading profile tool as a
3138  // serial overhead span (although it's used for an internal purpose only)
3139  // why was it visible in previous implementation?
3140  // should we keep it visible in new reduce block?
3141  kmp_user_lock_p lck;
3142 
3143 #if KMP_USE_DYNAMIC_LOCK
3144 
3145  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3146  // Check if it is initialized.
3147  if (*lk == 0) {
3148  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3149  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3150  KMP_GET_D_TAG(__kmp_user_lock_seq));
3151  } else {
3152  __kmp_init_indirect_csptr(crit, loc, global_tid,
3153  KMP_GET_I_TAG(__kmp_user_lock_seq));
3154  }
3155  }
3156  // Branch for accessing the actual lock object and set operation. This
3157  // branching is inevitable since this lock initialization does not follow the
3158  // normal dispatch path (lock table is not used).
3159  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3160  lck = (kmp_user_lock_p)lk;
3161  KMP_DEBUG_ASSERT(lck != NULL);
3162  if (__kmp_env_consistency_check) {
3163  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3164  }
3165  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3166  } else {
3167  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3168  lck = ilk->lock;
3169  KMP_DEBUG_ASSERT(lck != NULL);
3170  if (__kmp_env_consistency_check) {
3171  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3172  }
3173  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3174  }
3175 
3176 #else // KMP_USE_DYNAMIC_LOCK
3177 
3178  // We know that the fast reduction code is only emitted by Intel compilers
3179  // with 32 byte critical sections. If there isn't enough space, then we
3180  // have to use a pointer.
3181  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3182  lck = (kmp_user_lock_p)crit;
3183  } else {
3184  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3185  }
3186  KMP_DEBUG_ASSERT(lck != NULL);
3187 
3188  if (__kmp_env_consistency_check)
3189  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3190 
3191  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3192 
3193 #endif // KMP_USE_DYNAMIC_LOCK
3194 }
3195 
3196 // used in a critical section reduce block
3197 static __forceinline void
3198 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3199  kmp_critical_name *crit) {
3200 
3201  kmp_user_lock_p lck;
3202 
3203 #if KMP_USE_DYNAMIC_LOCK
3204 
3205  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3206  lck = (kmp_user_lock_p)crit;
3207  if (__kmp_env_consistency_check)
3208  __kmp_pop_sync(global_tid, ct_critical, loc);
3209  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3210  } else {
3211  kmp_indirect_lock_t *ilk =
3212  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3213  if (__kmp_env_consistency_check)
3214  __kmp_pop_sync(global_tid, ct_critical, loc);
3215  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3216  }
3217 
3218 #else // KMP_USE_DYNAMIC_LOCK
3219 
3220  // We know that the fast reduction code is only emitted by Intel compilers
3221  // with 32 byte critical sections. If there isn't enough space, then we have
3222  // to use a pointer.
3223  if (__kmp_base_user_lock_size > 32) {
3224  lck = *((kmp_user_lock_p *)crit);
3225  KMP_ASSERT(lck != NULL);
3226  } else {
3227  lck = (kmp_user_lock_p)crit;
3228  }
3229 
3230  if (__kmp_env_consistency_check)
3231  __kmp_pop_sync(global_tid, ct_critical, loc);
3232 
3233  __kmp_release_user_lock_with_checks(lck, global_tid);
3234 
3235 #endif // KMP_USE_DYNAMIC_LOCK
3236 } // __kmp_end_critical_section_reduce_block
3237 
3238 #if OMP_40_ENABLED
3239 static __forceinline int
3240 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3241  int *task_state) {
3242  kmp_team_t *team;
3243 
3244  // Check if we are inside the teams construct?
3245  if (th->th.th_teams_microtask) {
3246  *team_p = team = th->th.th_team;
3247  if (team->t.t_level == th->th.th_teams_level) {
3248  // This is reduction at teams construct.
3249  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3250  // Let's swap teams temporarily for the reduction.
3251  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3252  th->th.th_team = team->t.t_parent;
3253  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3254  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3255  *task_state = th->th.th_task_state;
3256  th->th.th_task_state = 0;
3257 
3258  return 1;
3259  }
3260  }
3261  return 0;
3262 }
3263 
3264 static __forceinline void
3265 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3266  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3267  th->th.th_info.ds.ds_tid = 0;
3268  th->th.th_team = team;
3269  th->th.th_team_nproc = team->t.t_nproc;
3270  th->th.th_task_team = team->t.t_task_team[task_state];
3271  th->th.th_task_state = task_state;
3272 }
3273 #endif
3274 
3275 /* 2.a.i. Reduce Block without a terminating barrier */
3291 kmp_int32
3292 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3293  size_t reduce_size, void *reduce_data,
3294  void (*reduce_func)(void *lhs_data, void *rhs_data),
3295  kmp_critical_name *lck) {
3296 
3297  KMP_COUNT_BLOCK(REDUCE_nowait);
3298  int retval = 0;
3299  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3300 #if OMP_40_ENABLED
3301  kmp_info_t *th;
3302  kmp_team_t *team;
3303  int teams_swapped = 0, task_state;
3304 #endif
3305  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3306 
3307  // why do we need this initialization here at all?
3308  // Reduction clause can not be used as a stand-alone directive.
3309 
3310  // do not call __kmp_serial_initialize(), it will be called by
3311  // __kmp_parallel_initialize() if needed
3312  // possible detection of false-positive race by the threadchecker ???
3313  if (!TCR_4(__kmp_init_parallel))
3314  __kmp_parallel_initialize();
3315 
3316 // check correctness of reduce block nesting
3317 #if KMP_USE_DYNAMIC_LOCK
3318  if (__kmp_env_consistency_check)
3319  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3320 #else
3321  if (__kmp_env_consistency_check)
3322  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3323 #endif
3324 
3325 #if OMP_40_ENABLED
3326  th = __kmp_thread_from_gtid(global_tid);
3327  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3328 #endif // OMP_40_ENABLED
3329 
3330  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3331  // the value should be kept in a variable
3332  // the variable should be either a construct-specific or thread-specific
3333  // property, not a team specific property
3334  // (a thread can reach the next reduce block on the next construct, reduce
3335  // method may differ on the next construct)
3336  // an ident_t "loc" parameter could be used as a construct-specific property
3337  // (what if loc == 0?)
3338  // (if both construct-specific and team-specific variables were shared,
3339  // then unness extra syncs should be needed)
3340  // a thread-specific variable is better regarding two issues above (next
3341  // construct and extra syncs)
3342  // a thread-specific "th_local.reduction_method" variable is used currently
3343  // each thread executes 'determine' and 'set' lines (no need to execute by one
3344  // thread, to avoid unness extra syncs)
3345 
3346  packed_reduction_method = __kmp_determine_reduction_method(
3347  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3348  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3349 
3350  if (packed_reduction_method == critical_reduce_block) {
3351 
3352  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3353  retval = 1;
3354 
3355  } else if (packed_reduction_method == empty_reduce_block) {
3356 
3357  // usage: if team size == 1, no synchronization is required ( Intel
3358  // platforms only )
3359  retval = 1;
3360 
3361  } else if (packed_reduction_method == atomic_reduce_block) {
3362 
3363  retval = 2;
3364 
3365  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3366  // won't be called by the code gen)
3367  // (it's not quite good, because the checking block has been closed by
3368  // this 'pop',
3369  // but atomic operation has not been executed yet, will be executed
3370  // slightly later, literally on next instruction)
3371  if (__kmp_env_consistency_check)
3372  __kmp_pop_sync(global_tid, ct_reduce, loc);
3373 
3374  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3375  tree_reduce_block)) {
3376 
3377 // AT: performance issue: a real barrier here
3378 // AT: (if master goes slow, other threads are blocked here waiting for the
3379 // master to come and release them)
3380 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3381 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3382 // be confusing to a customer)
3383 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3384 // might go faster and be more in line with sense of NOWAIT
3385 // AT: TO DO: do epcc test and compare times
3386 
3387 // this barrier should be invisible to a customer and to the threading profile
3388 // tool (it's neither a terminating barrier nor customer's code, it's
3389 // used for an internal purpose)
3390 #if OMPT_SUPPORT
3391  // JP: can this barrier potentially leed to task scheduling?
3392  // JP: as long as there is a barrier in the implementation, OMPT should and
3393  // will provide the barrier events
3394  // so we set-up the necessary frame/return addresses.
3395  omp_frame_t *ompt_frame;
3396  if (ompt_enabled.enabled) {
3397  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3398  if (ompt_frame->enter_frame == NULL)
3399  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3400  OMPT_STORE_RETURN_ADDRESS(global_tid);
3401  }
3402 #endif
3403 #if USE_ITT_NOTIFY
3404  __kmp_threads[global_tid]->th.th_ident = loc;
3405 #endif
3406  retval =
3407  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3408  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3409  retval = (retval != 0) ? (0) : (1);
3410 #if OMPT_SUPPORT && OMPT_OPTIONAL
3411  if (ompt_enabled.enabled) {
3412  ompt_frame->enter_frame = NULL;
3413  }
3414 #endif
3415 
3416  // all other workers except master should do this pop here
3417  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3418  if (__kmp_env_consistency_check) {
3419  if (retval == 0) {
3420  __kmp_pop_sync(global_tid, ct_reduce, loc);
3421  }
3422  }
3423 
3424  } else {
3425 
3426  // should never reach this block
3427  KMP_ASSERT(0); // "unexpected method"
3428  }
3429 #if OMP_40_ENABLED
3430  if (teams_swapped) {
3431  __kmp_restore_swapped_teams(th, team, task_state);
3432  }
3433 #endif
3434  KA_TRACE(
3435  10,
3436  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3437  global_tid, packed_reduction_method, retval));
3438 
3439  return retval;
3440 }
3441 
3450 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3451  kmp_critical_name *lck) {
3452 
3453  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3454 
3455  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3456 
3457  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3458 
3459  if (packed_reduction_method == critical_reduce_block) {
3460 
3461  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3462 
3463  } else if (packed_reduction_method == empty_reduce_block) {
3464 
3465  // usage: if team size == 1, no synchronization is required ( on Intel
3466  // platforms only )
3467 
3468  } else if (packed_reduction_method == atomic_reduce_block) {
3469 
3470  // neither master nor other workers should get here
3471  // (code gen does not generate this call in case 2: atomic reduce block)
3472  // actually it's better to remove this elseif at all;
3473  // after removal this value will checked by the 'else' and will assert
3474 
3475  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3476  tree_reduce_block)) {
3477 
3478  // only master gets here
3479 
3480  } else {
3481 
3482  // should never reach this block
3483  KMP_ASSERT(0); // "unexpected method"
3484  }
3485 
3486  if (__kmp_env_consistency_check)
3487  __kmp_pop_sync(global_tid, ct_reduce, loc);
3488 
3489  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3490  global_tid, packed_reduction_method));
3491 
3492  return;
3493 }
3494 
3495 /* 2.a.ii. Reduce Block with a terminating barrier */
3496 
3512 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3513  size_t reduce_size, void *reduce_data,
3514  void (*reduce_func)(void *lhs_data, void *rhs_data),
3515  kmp_critical_name *lck) {
3516  KMP_COUNT_BLOCK(REDUCE_wait);
3517  int retval = 0;
3518  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3519 #if OMP_40_ENABLED
3520  kmp_info_t *th;
3521  kmp_team_t *team;
3522  int teams_swapped = 0, task_state;
3523 #endif
3524 
3525  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3526 
3527  // why do we need this initialization here at all?
3528  // Reduction clause can not be a stand-alone directive.
3529 
3530  // do not call __kmp_serial_initialize(), it will be called by
3531  // __kmp_parallel_initialize() if needed
3532  // possible detection of false-positive race by the threadchecker ???
3533  if (!TCR_4(__kmp_init_parallel))
3534  __kmp_parallel_initialize();
3535 
3536 // check correctness of reduce block nesting
3537 #if KMP_USE_DYNAMIC_LOCK
3538  if (__kmp_env_consistency_check)
3539  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3540 #else
3541  if (__kmp_env_consistency_check)
3542  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3543 #endif
3544 
3545 #if OMP_40_ENABLED
3546  th = __kmp_thread_from_gtid(global_tid);
3547  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3548 #endif // OMP_40_ENABLED
3549 
3550  packed_reduction_method = __kmp_determine_reduction_method(
3551  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3552  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3553 
3554  if (packed_reduction_method == critical_reduce_block) {
3555 
3556  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3557  retval = 1;
3558 
3559  } else if (packed_reduction_method == empty_reduce_block) {
3560 
3561  // usage: if team size == 1, no synchronization is required ( Intel
3562  // platforms only )
3563  retval = 1;
3564 
3565  } else if (packed_reduction_method == atomic_reduce_block) {
3566 
3567  retval = 2;
3568 
3569  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3570  tree_reduce_block)) {
3571 
3572 // case tree_reduce_block:
3573 // this barrier should be visible to a customer and to the threading profile
3574 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3575 #if OMPT_SUPPORT
3576  omp_frame_t *ompt_frame;
3577  if (ompt_enabled.enabled) {
3578  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3579  if (ompt_frame->enter_frame == NULL)
3580  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3581  OMPT_STORE_RETURN_ADDRESS(global_tid);
3582  }
3583 #endif
3584 #if USE_ITT_NOTIFY
3585  __kmp_threads[global_tid]->th.th_ident =
3586  loc; // needed for correct notification of frames
3587 #endif
3588  retval =
3589  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3590  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3591  retval = (retval != 0) ? (0) : (1);
3592 #if OMPT_SUPPORT && OMPT_OPTIONAL
3593  if (ompt_enabled.enabled) {
3594  ompt_frame->enter_frame = NULL;
3595  }
3596 #endif
3597 
3598  // all other workers except master should do this pop here
3599  // ( none of other workers except master will enter __kmpc_end_reduce() )
3600  if (__kmp_env_consistency_check) {
3601  if (retval == 0) { // 0: all other workers; 1: master
3602  __kmp_pop_sync(global_tid, ct_reduce, loc);
3603  }
3604  }
3605 
3606  } else {
3607 
3608  // should never reach this block
3609  KMP_ASSERT(0); // "unexpected method"
3610  }
3611 #if OMP_40_ENABLED
3612  if (teams_swapped) {
3613  __kmp_restore_swapped_teams(th, team, task_state);
3614  }
3615 #endif
3616 
3617  KA_TRACE(10,
3618  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3619  global_tid, packed_reduction_method, retval));
3620 
3621  return retval;
3622 }
3623 
3634 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3635  kmp_critical_name *lck) {
3636 
3637  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638 #if OMP_40_ENABLED
3639  kmp_info_t *th;
3640  kmp_team_t *team;
3641  int teams_swapped = 0, task_state;
3642 #endif
3643 
3644  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3645 
3646 #if OMP_40_ENABLED
3647  th = __kmp_thread_from_gtid(global_tid);
3648  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3649 #endif // OMP_40_ENABLED
3650 
3651  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3652 
3653  // this barrier should be visible to a customer and to the threading profile
3654  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3655 
3656  if (packed_reduction_method == critical_reduce_block) {
3657 
3658  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3659 
3660 // TODO: implicit barrier: should be exposed
3661 #if OMPT_SUPPORT
3662  omp_frame_t *ompt_frame;
3663  if (ompt_enabled.enabled) {
3664  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3665  if (ompt_frame->enter_frame == NULL)
3666  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3667  OMPT_STORE_RETURN_ADDRESS(global_tid);
3668  }
3669 #endif
3670 #if USE_ITT_NOTIFY
3671  __kmp_threads[global_tid]->th.th_ident = loc;
3672 #endif
3673  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3674 #if OMPT_SUPPORT && OMPT_OPTIONAL
3675  if (ompt_enabled.enabled) {
3676  ompt_frame->enter_frame = NULL;
3677  }
3678 #endif
3679 
3680  } else if (packed_reduction_method == empty_reduce_block) {
3681 
3682 // usage: if team size==1, no synchronization is required (Intel platforms only)
3683 
3684 // TODO: implicit barrier: should be exposed
3685 #if OMPT_SUPPORT
3686  omp_frame_t *ompt_frame;
3687  if (ompt_enabled.enabled) {
3688  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3689  if (ompt_frame->enter_frame == NULL)
3690  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3691  OMPT_STORE_RETURN_ADDRESS(global_tid);
3692  }
3693 #endif
3694 #if USE_ITT_NOTIFY
3695  __kmp_threads[global_tid]->th.th_ident = loc;
3696 #endif
3697  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3698 #if OMPT_SUPPORT && OMPT_OPTIONAL
3699  if (ompt_enabled.enabled) {
3700  ompt_frame->enter_frame = NULL;
3701  }
3702 #endif
3703 
3704  } else if (packed_reduction_method == atomic_reduce_block) {
3705 
3706 #if OMPT_SUPPORT
3707  omp_frame_t *ompt_frame;
3708  if (ompt_enabled.enabled) {
3709  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3710  if (ompt_frame->enter_frame == NULL)
3711  ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
3712  OMPT_STORE_RETURN_ADDRESS(global_tid);
3713  }
3714 #endif
3715 // TODO: implicit barrier: should be exposed
3716 #if USE_ITT_NOTIFY
3717  __kmp_threads[global_tid]->th.th_ident = loc;
3718 #endif
3719  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3720 #if OMPT_SUPPORT && OMPT_OPTIONAL
3721  if (ompt_enabled.enabled) {
3722  ompt_frame->enter_frame = NULL;
3723  }
3724 #endif
3725 
3726  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3727  tree_reduce_block)) {
3728 
3729  // only master executes here (master releases all other workers)
3730  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3731  global_tid);
3732 
3733  } else {
3734 
3735  // should never reach this block
3736  KMP_ASSERT(0); // "unexpected method"
3737  }
3738 #if OMP_40_ENABLED
3739  if (teams_swapped) {
3740  __kmp_restore_swapped_teams(th, team, task_state);
3741  }
3742 #endif
3743 
3744  if (__kmp_env_consistency_check)
3745  __kmp_pop_sync(global_tid, ct_reduce, loc);
3746 
3747  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3748  global_tid, packed_reduction_method));
3749 
3750  return;
3751 }
3752 
3753 #undef __KMP_GET_REDUCTION_METHOD
3754 #undef __KMP_SET_REDUCTION_METHOD
3755 
3756 /* end of interface to fast scalable reduce routines */
3757 
3758 kmp_uint64 __kmpc_get_taskid() {
3759 
3760  kmp_int32 gtid;
3761  kmp_info_t *thread;
3762 
3763  gtid = __kmp_get_gtid();
3764  if (gtid < 0) {
3765  return 0;
3766  }
3767  thread = __kmp_thread_from_gtid(gtid);
3768  return thread->th.th_current_task->td_task_id;
3769 
3770 } // __kmpc_get_taskid
3771 
3772 kmp_uint64 __kmpc_get_parent_taskid() {
3773 
3774  kmp_int32 gtid;
3775  kmp_info_t *thread;
3776  kmp_taskdata_t *parent_task;
3777 
3778  gtid = __kmp_get_gtid();
3779  if (gtid < 0) {
3780  return 0;
3781  }
3782  thread = __kmp_thread_from_gtid(gtid);
3783  parent_task = thread->th.th_current_task->td_parent;
3784  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3785 
3786 } // __kmpc_get_parent_taskid
3787 
3788 #if OMP_45_ENABLED
3789 
3800 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3801  const struct kmp_dim *dims) {
3802  int j, idx;
3803  kmp_int64 last, trace_count;
3804  kmp_info_t *th = __kmp_threads[gtid];
3805  kmp_team_t *team = th->th.th_team;
3806  kmp_uint32 *flags;
3807  kmp_disp_t *pr_buf = th->th.th_dispatch;
3808  dispatch_shared_info_t *sh_buf;
3809 
3810  KA_TRACE(
3811  20,
3812  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3813  gtid, num_dims, !team->t.t_serialized));
3814  KMP_DEBUG_ASSERT(dims != NULL);
3815  KMP_DEBUG_ASSERT(num_dims > 0);
3816 
3817  if (team->t.t_serialized) {
3818  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3819  return; // no dependencies if team is serialized
3820  }
3821  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3822  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3823  // the next loop
3824  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3825 
3826  // Save bounds info into allocated private buffer
3827  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3828  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3829  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3830  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3831  pr_buf->th_doacross_info[0] =
3832  (kmp_int64)num_dims; // first element is number of dimensions
3833  // Save also address of num_done in order to access it later without knowing
3834  // the buffer index
3835  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3836  pr_buf->th_doacross_info[2] = dims[0].lo;
3837  pr_buf->th_doacross_info[3] = dims[0].up;
3838  pr_buf->th_doacross_info[4] = dims[0].st;
3839  last = 5;
3840  for (j = 1; j < num_dims; ++j) {
3841  kmp_int64
3842  range_length; // To keep ranges of all dimensions but the first dims[0]
3843  if (dims[j].st == 1) { // most common case
3844  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3845  range_length = dims[j].up - dims[j].lo + 1;
3846  } else {
3847  if (dims[j].st > 0) {
3848  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3849  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3850  } else { // negative increment
3851  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3852  range_length =
3853  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3854  }
3855  }
3856  pr_buf->th_doacross_info[last++] = range_length;
3857  pr_buf->th_doacross_info[last++] = dims[j].lo;
3858  pr_buf->th_doacross_info[last++] = dims[j].up;
3859  pr_buf->th_doacross_info[last++] = dims[j].st;
3860  }
3861 
3862  // Compute total trip count.
3863  // Start with range of dims[0] which we don't need to keep in the buffer.
3864  if (dims[0].st == 1) { // most common case
3865  trace_count = dims[0].up - dims[0].lo + 1;
3866  } else if (dims[0].st > 0) {
3867  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3868  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3869  } else { // negative increment
3870  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3871  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3872  }
3873  for (j = 1; j < num_dims; ++j) {
3874  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3875  }
3876  KMP_DEBUG_ASSERT(trace_count > 0);
3877 
3878  // Check if shared buffer is not occupied by other loop (idx -
3879  // __kmp_dispatch_num_buffers)
3880  if (idx != sh_buf->doacross_buf_idx) {
3881  // Shared buffer is occupied, wait for it to be free
3882  __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3883  __kmp_eq_4, NULL);
3884  }
3885 #if KMP_32_BIT_ARCH
3886  // Check if we are the first thread. After the CAS the first thread gets 0,
3887  // others get 1 if initialization is in progress, allocated pointer otherwise.
3888  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3889  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3890  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3891 #else
3892  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3893  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3894 #endif
3895  if (flags == NULL) {
3896  // we are the first thread, allocate the array of flags
3897  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3898  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3899  KMP_MB();
3900  sh_buf->doacross_flags = flags;
3901  } else if (flags == (kmp_uint32 *)1) {
3902 #if KMP_32_BIT_ARCH
3903  // initialization is still in progress, need to wait
3904  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3905 #else
3906  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3907 #endif
3908  KMP_YIELD(TRUE);
3909  KMP_MB();
3910  } else {
3911  KMP_MB();
3912  }
3913  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3914  pr_buf->th_doacross_flags =
3915  sh_buf->doacross_flags; // save private copy in order to not
3916  // touch shared buffer on each iteration
3917  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
3918 }
3919 
3920 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
3921  kmp_int32 shft, num_dims, i;
3922  kmp_uint32 flag;
3923  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
3924  kmp_info_t *th = __kmp_threads[gtid];
3925  kmp_team_t *team = th->th.th_team;
3926  kmp_disp_t *pr_buf;
3927  kmp_int64 lo, up, st;
3928 
3929  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
3930  if (team->t.t_serialized) {
3931  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
3932  return; // no dependencies if team is serialized
3933  }
3934 
3935  // calculate sequential iteration number and check out-of-bounds condition
3936  pr_buf = th->th.th_dispatch;
3937  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3938  num_dims = pr_buf->th_doacross_info[0];
3939  lo = pr_buf->th_doacross_info[2];
3940  up = pr_buf->th_doacross_info[3];
3941  st = pr_buf->th_doacross_info[4];
3942  if (st == 1) { // most common case
3943  if (vec[0] < lo || vec[0] > up) {
3944  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3945  "bounds [%lld,%lld]\n",
3946  gtid, vec[0], lo, up));
3947  return;
3948  }
3949  iter_number = vec[0] - lo;
3950  } else if (st > 0) {
3951  if (vec[0] < lo || vec[0] > up) {
3952  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3953  "bounds [%lld,%lld]\n",
3954  gtid, vec[0], lo, up));
3955  return;
3956  }
3957  iter_number = (kmp_uint64)(vec[0] - lo) / st;
3958  } else { // negative increment
3959  if (vec[0] > lo || vec[0] < up) {
3960  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3961  "bounds [%lld,%lld]\n",
3962  gtid, vec[0], lo, up));
3963  return;
3964  }
3965  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
3966  }
3967  for (i = 1; i < num_dims; ++i) {
3968  kmp_int64 iter, ln;
3969  kmp_int32 j = i * 4;
3970  ln = pr_buf->th_doacross_info[j + 1];
3971  lo = pr_buf->th_doacross_info[j + 2];
3972  up = pr_buf->th_doacross_info[j + 3];
3973  st = pr_buf->th_doacross_info[j + 4];
3974  if (st == 1) {
3975  if (vec[i] < lo || vec[i] > up) {
3976  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3977  "bounds [%lld,%lld]\n",
3978  gtid, vec[i], lo, up));
3979  return;
3980  }
3981  iter = vec[i] - lo;
3982  } else if (st > 0) {
3983  if (vec[i] < lo || vec[i] > up) {
3984  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3985  "bounds [%lld,%lld]\n",
3986  gtid, vec[i], lo, up));
3987  return;
3988  }
3989  iter = (kmp_uint64)(vec[i] - lo) / st;
3990  } else { // st < 0
3991  if (vec[i] > lo || vec[i] < up) {
3992  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
3993  "bounds [%lld,%lld]\n",
3994  gtid, vec[i], lo, up));
3995  return;
3996  }
3997  iter = (kmp_uint64)(lo - vec[i]) / (-st);
3998  }
3999  iter_number = iter + ln * iter_number;
4000  }
4001  shft = iter_number % 32; // use 32-bit granularity
4002  iter_number >>= 5; // divided by 32
4003  flag = 1 << shft;
4004  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4005  KMP_YIELD(TRUE);
4006  }
4007  KMP_MB();
4008  KA_TRACE(20,
4009  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4010  gtid, (iter_number << 5) + shft));
4011 }
4012 
4013 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4014  kmp_int32 shft, num_dims, i;
4015  kmp_uint32 flag;
4016  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4017  kmp_info_t *th = __kmp_threads[gtid];
4018  kmp_team_t *team = th->th.th_team;
4019  kmp_disp_t *pr_buf;
4020  kmp_int64 lo, st;
4021 
4022  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4023  if (team->t.t_serialized) {
4024  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4025  return; // no dependencies if team is serialized
4026  }
4027 
4028  // calculate sequential iteration number (same as in "wait" but no
4029  // out-of-bounds checks)
4030  pr_buf = th->th.th_dispatch;
4031  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4032  num_dims = pr_buf->th_doacross_info[0];
4033  lo = pr_buf->th_doacross_info[2];
4034  st = pr_buf->th_doacross_info[4];
4035  if (st == 1) { // most common case
4036  iter_number = vec[0] - lo;
4037  } else if (st > 0) {
4038  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4039  } else { // negative increment
4040  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4041  }
4042  for (i = 1; i < num_dims; ++i) {
4043  kmp_int64 iter, ln;
4044  kmp_int32 j = i * 4;
4045  ln = pr_buf->th_doacross_info[j + 1];
4046  lo = pr_buf->th_doacross_info[j + 2];
4047  st = pr_buf->th_doacross_info[j + 4];
4048  if (st == 1) {
4049  iter = vec[i] - lo;
4050  } else if (st > 0) {
4051  iter = (kmp_uint64)(vec[i] - lo) / st;
4052  } else { // st < 0
4053  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4054  }
4055  iter_number = iter + ln * iter_number;
4056  }
4057  shft = iter_number % 32; // use 32-bit granularity
4058  iter_number >>= 5; // divided by 32
4059  flag = 1 << shft;
4060  KMP_MB();
4061  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4062  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4063  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4064  (iter_number << 5) + shft));
4065 }
4066 
4067 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4068  kmp_int32 num_done;
4069  kmp_info_t *th = __kmp_threads[gtid];
4070  kmp_team_t *team = th->th.th_team;
4071  kmp_disp_t *pr_buf = th->th.th_dispatch;
4072 
4073  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4074  if (team->t.t_serialized) {
4075  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4076  return; // nothing to do
4077  }
4078  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4079  if (num_done == th->th.th_team_nproc) {
4080  // we are the last thread, need to free shared resources
4081  int idx = pr_buf->th_doacross_buf_idx - 1;
4082  dispatch_shared_info_t *sh_buf =
4083  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4084  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4085  (kmp_int64)&sh_buf->doacross_num_done);
4086  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4087  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4088  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4089  sh_buf->doacross_flags = NULL;
4090  sh_buf->doacross_num_done = 0;
4091  sh_buf->doacross_buf_idx +=
4092  __kmp_dispatch_num_buffers; // free buffer for future re-use
4093  }
4094  // free private resources (need to keep buffer index forever)
4095  pr_buf->th_doacross_flags = NULL;
4096  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4097  pr_buf->th_doacross_info = NULL;
4098  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4099 }
4100 #endif
4101 
4102 #if OMP_50_ENABLED
4103 int __kmpc_get_target_offload(void) {
4104  if (!__kmp_init_serial) {
4105  __kmp_serial_initialize();
4106  }
4107  return __kmp_target_offload;
4108 }
4109 #endif // OMP_50_ENABLED
4110 
4111 // end of file //
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:890
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:219
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1398
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
char const * psource
Definition: kmp.h:229
kmp_int32 flags
Definition: kmp.h:221