LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
16 
17 #include "kmp.h"
18 #include "kmp_itt.h"
19 #include "kmp_stats.h"
20 #if OMPT_SUPPORT
21 #include "ompt-specific.h"
22 #endif
23 
40 enum flag_type {
44 };
45 
49 template <typename P> class kmp_flag_native {
50  volatile P *loc;
51  flag_type t;
52 
53 public:
54  typedef P flag_t;
55  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56  volatile P *get() { return loc; }
57  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
58  void set(volatile P *new_loc) { loc = new_loc; }
59  flag_type get_type() { return t; }
60  P load() { return *loc; }
61  void store(P val) { *loc = val; }
62 };
63 
67 template <typename P> class kmp_flag {
68  std::atomic<P>
69  *loc;
72 public:
73  typedef P flag_t;
74  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
78  std::atomic<P> *get() { return loc; }
82  void *get_void_p() { return RCAST(void *, loc); }
86  void set(std::atomic<P> *new_loc) { loc = new_loc; }
90  flag_type get_type() { return t; }
94  P load() { return loc->load(std::memory_order_acquire); }
98  void store(P val) { loc->store(val, std::memory_order_release); }
99  // Derived classes must provide the following:
100  /*
101  kmp_info_t * get_waiter(kmp_uint32 i);
102  kmp_uint32 get_num_waiters();
103  bool done_check();
104  bool done_check_val(P old_loc);
105  bool notdone_check();
106  P internal_release();
107  void suspend(int th_gtid);
108  void resume(int th_gtid);
109  P set_sleeping();
110  P unset_sleeping();
111  bool is_sleeping();
112  bool is_any_sleeping();
113  bool is_sleeping_val(P old_loc);
114  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115  int *thread_finished
116  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
117  is_constrained);
118  */
119 };
120 
121 #if OMPT_SUPPORT
122 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
123  omp_state_t omp_state,
124  ompt_data_t *tId,
125  ompt_data_t *pId) {
126  int ds_tid = this_thr->th.th_info.ds.ds_tid;
127  if (omp_state == omp_state_wait_barrier_implicit) {
128  this_thr->th.ompt_thread_info.state = omp_state_overhead;
129 #if OMPT_OPTIONAL
130  void *codeptr = NULL;
131  if (ompt_enabled.ompt_callback_sync_region_wait) {
132  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
133  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
138  }
139 #endif
140  if (!KMP_MASTER_TID(ds_tid)) {
141  if (ompt_enabled.ompt_callback_implicit_task) {
142  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
143  ompt_scope_end, NULL, tId, 0, ds_tid);
144  }
145  // return to idle state
146  this_thr->th.ompt_thread_info.state = omp_state_idle;
147  } else {
148  this_thr->th.ompt_thread_info.state = omp_state_overhead;
149  }
150  }
151 }
152 #endif
153 
154 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
155  calls __kmp_wait_* must make certain that another thread calls __kmp_release
156  to wake it back up to prevent deadlocks!
157 
158  NOTE: We may not belong to a team at this point. */
159 template <class C, int final_spin>
160 static inline void
161 __kmp_wait_template(kmp_info_t *this_thr,
162  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY
164  volatile void *spin = flag->get();
165 #endif
166  kmp_uint32 spins;
167  int th_gtid;
168  int tasks_completed = FALSE;
169  int oversubscribed;
170 #if !KMP_USE_MONITOR
171  kmp_uint64 poll_count;
172  kmp_uint64 hibernate_goal;
173 #else
174  kmp_uint32 hibernate;
175 #endif
176 
177  KMP_FSYNC_SPIN_INIT(spin, NULL);
178  if (flag->done_check()) {
179  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
180  return;
181  }
182  th_gtid = this_thr->th.th_info.ds.ds_gtid;
183 #if KMP_OS_UNIX
184  if (final_spin)
185  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
186 #endif
187  KA_TRACE(20,
188  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
189 #if KMP_STATS_ENABLED
190  stats_state_e thread_state = KMP_GET_THREAD_STATE();
191 #endif
192 
193 /* OMPT Behavior:
194 THIS function is called from
195  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
196  these have join / fork behavior
197 
198  In these cases, we don't change the state or trigger events in THIS
199 function.
200  Events are triggered in the calling code (__kmp_barrier):
201 
202  state := omp_state_overhead
203  barrier-begin
204  barrier-wait-begin
205  state := omp_state_wait_barrier
206  call join-barrier-implementation (finally arrive here)
207  {}
208  call fork-barrier-implementation (finally arrive here)
209  {}
210  state := omp_state_overhead
211  barrier-wait-end
212  barrier-end
213  state := omp_state_work_parallel
214 
215 
216  __kmp_fork_barrier (after thread creation, before executing implicit task)
217  call fork-barrier-implementation (finally arrive here)
218  {} // worker arrive here with state = omp_state_idle
219 
220 
221  __kmp_join_barrier (implicit barrier at end of parallel region)
222  state := omp_state_barrier_implicit
223  barrier-begin
224  barrier-wait-begin
225  call join-barrier-implementation (finally arrive here
226 final_spin=FALSE)
227  {
228  }
229  __kmp_fork_barrier (implicit barrier at end of parallel region)
230  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
231 
232  Worker after task-team is finished:
233  barrier-wait-end
234  barrier-end
235  implicit-task-end
236  idle-begin
237  state := omp_state_idle
238 
239  Before leaving, if state = omp_state_idle
240  idle-end
241  state := omp_state_overhead
242 */
243 #if OMPT_SUPPORT
244  omp_state_t ompt_entry_state;
245  ompt_data_t *pId = NULL;
246  ompt_data_t *tId;
247  if (ompt_enabled.enabled) {
248  ompt_entry_state = this_thr->th.ompt_thread_info.state;
249  if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
250  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
251  ompt_lw_taskteam_t *team =
252  this_thr->th.th_team->t.ompt_serialized_team_info;
253  if (team) {
254  pId = &(team->ompt_team_info.parallel_data);
255  tId = &(team->ompt_task_info.task_data);
256  } else {
257  pId = OMPT_CUR_TEAM_DATA(this_thr);
258  tId = OMPT_CUR_TASK_DATA(this_thr);
259  }
260  } else {
261  pId = NULL;
262  tId = &(this_thr->th.ompt_thread_info.task_data);
263  }
264  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
265  this_thr->th.th_task_team == NULL)) {
266  // implicit task is done. Either no taskqueue, or task-team finished
267  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
268  }
269  }
270 #endif
271 
272  // Setup for waiting
273  KMP_INIT_YIELD(spins);
274 
275  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
276 #if KMP_USE_MONITOR
277 // The worker threads cannot rely on the team struct existing at this point.
278 // Use the bt values cached in the thread struct instead.
279 #ifdef KMP_ADJUST_BLOCKTIME
280  if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
281  // Force immediate suspend if not set by user and more threads than
282  // available procs
283  hibernate = 0;
284  else
285  hibernate = this_thr->th.th_team_bt_intervals;
286 #else
287  hibernate = this_thr->th.th_team_bt_intervals;
288 #endif /* KMP_ADJUST_BLOCKTIME */
289 
290  /* If the blocktime is nonzero, we want to make sure that we spin wait for
291  the entirety of the specified #intervals, plus up to one interval more.
292  This increment make certain that this thread doesn't go to sleep too
293  soon. */
294  if (hibernate != 0)
295  hibernate++;
296 
297  // Add in the current time value.
298  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
299  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
300  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
301  hibernate - __kmp_global.g.g_time.dt.t_value));
302 #else
303  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
304  poll_count = 0;
305 #endif // KMP_USE_MONITOR
306  }
307 
308  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
309  KMP_MB();
310 
311  // Main wait spin loop
312  while (flag->notdone_check()) {
313  int in_pool;
314  kmp_task_team_t *task_team = NULL;
315  if (__kmp_tasking_mode != tskm_immediate_exec) {
316  task_team = this_thr->th.th_task_team;
317  /* If the thread's task team pointer is NULL, it means one of 3 things:
318  1) A newly-created thread is first being released by
319  __kmp_fork_barrier(), and its task team has not been set up yet.
320  2) All tasks have been executed to completion.
321  3) Tasking is off for this region. This could be because we are in a
322  serialized region (perhaps the outer one), or else tasking was manually
323  disabled (KMP_TASKING=0). */
324  if (task_team != NULL) {
325  if (TCR_SYNC_4(task_team->tt.tt_active)) {
326  if (KMP_TASKING_ENABLED(task_team))
327  flag->execute_tasks(
328  this_thr, th_gtid, final_spin,
329  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
330  else
331  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
332  } else {
333  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
334 #if OMPT_SUPPORT
335  // task-team is done now, other cases should be catched above
336  if (final_spin && ompt_enabled.enabled)
337  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
338 #endif
339  this_thr->th.th_task_team = NULL;
340  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
341  }
342  } else {
343  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
344  } // if
345  } // if
346 
347  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
348  if (TCR_4(__kmp_global.g.g_done)) {
349  if (__kmp_global.g.g_abort)
350  __kmp_abort_thread();
351  break;
352  }
353 
354  // If we are oversubscribed, or have waited a bit (and
355  // KMP_LIBRARY=throughput), then yield
356  // TODO: Should it be number of cores instead of thread contexts? Like:
357  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
358  // Need performance improvement data to make the change...
359  if (oversubscribed) {
360  KMP_YIELD(1);
361  } else {
362  KMP_YIELD_SPIN(spins);
363  }
364  // Check if this thread was transferred from a team
365  // to the thread pool (or vice-versa) while spinning.
366  in_pool = !!TCR_4(this_thr->th.th_in_pool);
367  if (in_pool != !!this_thr->th.th_active_in_pool) {
368  if (in_pool) { // Recently transferred from team to pool
369  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
370  this_thr->th.th_active_in_pool = TRUE;
371  /* Here, we cannot assert that:
372  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
373  __kmp_thread_pool_nth);
374  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
375  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
376  inc/dec'd asynchronously by the workers. The two can get out of sync
377  for brief periods of time. */
378  } else { // Recently transferred from pool to team
379  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
380  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
381  this_thr->th.th_active_in_pool = FALSE;
382  }
383  }
384 
385 #if KMP_STATS_ENABLED
386  // Check if thread has been signalled to idle state
387  // This indicates that the logical "join-barrier" has finished
388  if (this_thr->th.th_stats->isIdle() &&
389  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
390  KMP_SET_THREAD_STATE(IDLE);
391  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
392  }
393 #endif
394 
395  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
396  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
397  continue;
398 
399  // Don't suspend if there is a likelihood of new tasks being spawned.
400  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
401  continue;
402 
403 #if KMP_USE_MONITOR
404  // If we have waited a bit more, fall asleep
405  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
406  continue;
407 #else
408  if (KMP_BLOCKING(hibernate_goal, poll_count++))
409  continue;
410 #endif
411 
412  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
413 #if KMP_OS_UNIX
414  if (final_spin)
415  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
416 #endif
417  flag->suspend(th_gtid);
418 #if KMP_OS_UNIX
419  if (final_spin)
420  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
421 #endif
422 
423  if (TCR_4(__kmp_global.g.g_done)) {
424  if (__kmp_global.g.g_abort)
425  __kmp_abort_thread();
426  break;
427  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
428  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
429  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
430  }
431  // TODO: If thread is done with work and times out, disband/free
432  }
433 
434 #if OMPT_SUPPORT
435  omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
436  if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
437 #if OMPT_OPTIONAL
438  if (final_spin) {
439  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
440  ompt_exit_state = this_thr->th.ompt_thread_info.state;
441  }
442 #endif
443  if (ompt_exit_state == omp_state_idle) {
444  this_thr->th.ompt_thread_info.state = omp_state_overhead;
445  }
446  }
447 #endif
448 #if KMP_STATS_ENABLED
449  // If we were put into idle state, pop that off the state stack
450  if (KMP_GET_THREAD_STATE() == IDLE) {
451  KMP_POP_PARTITIONED_TIMER();
452  KMP_SET_THREAD_STATE(thread_state);
453  this_thr->th.th_stats->resetIdleFlag();
454  }
455 #endif
456 
457 #if KMP_OS_UNIX
458  if (final_spin)
459  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
460 #endif
461  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
462 }
463 
464 /* Release any threads specified as waiting on the flag by releasing the flag
465  and resume the waiting thread if indicated by the sleep bit(s). A thread that
466  calls __kmp_wait_template must call this function to wake up the potentially
467  sleeping thread and prevent deadlocks! */
468 template <class C> static inline void __kmp_release_template(C *flag) {
469 #ifdef KMP_DEBUG
470  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
471 #endif
472  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
473  KMP_DEBUG_ASSERT(flag->get());
474  KMP_FSYNC_RELEASING(flag->get_void_p());
475 
476  flag->internal_release();
477 
478  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
479  flag->load()));
480 
481  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
482  // Only need to check sleep stuff if infinite block time not set.
483  // Are *any* threads waiting on flag sleeping?
484  if (flag->is_any_sleeping()) {
485  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
486  // if sleeping waiter exists at i, sets current_waiter to i inside flag
487  kmp_info_t *waiter = flag->get_waiter(i);
488  if (waiter) {
489  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
490  // Wake up thread if needed
491  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
492  "flag(%p) set\n",
493  gtid, wait_gtid, flag->get()));
494  flag->resume(wait_gtid); // unsets flag's current_waiter when done
495  }
496  }
497  }
498  }
499 }
500 
501 template <typename FlagType> struct flag_traits {};
502 
503 template <> struct flag_traits<kmp_uint32> {
504  typedef kmp_uint32 flag_t;
505  static const flag_type t = flag32;
506  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
507  static inline flag_t test_then_add4(volatile flag_t *f) {
508  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
509  }
510  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
511  return KMP_TEST_THEN_OR32(f, v);
512  }
513  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
514  return KMP_TEST_THEN_AND32(f, v);
515  }
516 };
517 
518 template <> struct flag_traits<kmp_uint64> {
519  typedef kmp_uint64 flag_t;
520  static const flag_type t = flag64;
521  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
522  static inline flag_t test_then_add4(volatile flag_t *f) {
523  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
524  }
525  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
526  return KMP_TEST_THEN_OR64(f, v);
527  }
528  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
529  return KMP_TEST_THEN_AND64(f, v);
530  }
531 };
532 
533 // Basic flag that does not use C11 Atomics
534 template <typename FlagType>
535 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
536  typedef flag_traits<FlagType> traits_type;
537  FlagType checker;
539  kmp_info_t
540  *waiting_threads[1];
541  kmp_uint32
542  num_waiting_threads;
543 public:
544  kmp_basic_flag_native(volatile FlagType *p)
545  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
546  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
547  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
548  waiting_threads[0] = thr;
549  }
550  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
551  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
552  num_waiting_threads(0) {}
557  kmp_info_t *get_waiter(kmp_uint32 i) {
558  KMP_DEBUG_ASSERT(i < num_waiting_threads);
559  return waiting_threads[i];
560  }
564  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
570  void set_waiter(kmp_info_t *thr) {
571  waiting_threads[0] = thr;
572  num_waiting_threads = 1;
573  }
577  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
582  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
590  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
595  void internal_release() {
596  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
597  }
603  FlagType set_sleeping() {
604  return traits_type::test_then_or((volatile FlagType *)this->get(),
605  KMP_BARRIER_SLEEP_STATE);
606  }
612  FlagType unset_sleeping() {
613  return traits_type::test_then_and((volatile FlagType *)this->get(),
614  ~KMP_BARRIER_SLEEP_STATE);
615  }
620  bool is_sleeping_val(FlagType old_loc) {
621  return old_loc & KMP_BARRIER_SLEEP_STATE;
622  }
626  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
627  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
628  kmp_uint8 *get_stolen() { return NULL; }
629  enum barrier_type get_bt() { return bs_last_barrier; }
630 };
631 
632 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
633  typedef flag_traits<FlagType> traits_type;
634  FlagType checker;
636  kmp_info_t
637  *waiting_threads[1];
638  kmp_uint32
639  num_waiting_threads;
640 public:
641  kmp_basic_flag(std::atomic<FlagType> *p)
642  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
643  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
644  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
645  waiting_threads[0] = thr;
646  }
647  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
648  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
649  num_waiting_threads(0) {}
654  kmp_info_t *get_waiter(kmp_uint32 i) {
655  KMP_DEBUG_ASSERT(i < num_waiting_threads);
656  return waiting_threads[i];
657  }
661  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
667  void set_waiter(kmp_info_t *thr) {
668  waiting_threads[0] = thr;
669  num_waiting_threads = 1;
670  }
674  bool done_check() { return this->load() == checker; }
679  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
687  bool notdone_check() { return this->load() != checker; }
692  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
698  FlagType set_sleeping() {
699  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
700  }
706  FlagType unset_sleeping() {
707  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
708  }
713  bool is_sleeping_val(FlagType old_loc) {
714  return old_loc & KMP_BARRIER_SLEEP_STATE;
715  }
719  bool is_sleeping() { return is_sleeping_val(this->load()); }
720  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
721  kmp_uint8 *get_stolen() { return NULL; }
722  enum barrier_type get_bt() { return bs_last_barrier; }
723 };
724 
725 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
726 public:
727  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
728  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
729  : kmp_basic_flag<kmp_uint32>(p, thr) {}
730  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
731  : kmp_basic_flag<kmp_uint32>(p, c) {}
732  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
733  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
734  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
735  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
736  kmp_int32 is_constrained) {
737  return __kmp_execute_tasks_32(
738  this_thr, gtid, this, final_spin,
739  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
740  }
741  void wait(kmp_info_t *this_thr,
742  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
743  if (final_spin)
744  __kmp_wait_template<kmp_flag_32, TRUE>(
745  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
746  else
747  __kmp_wait_template<kmp_flag_32, FALSE>(
748  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
749  }
750  void release() { __kmp_release_template(this); }
751  flag_type get_ptr_type() { return flag32; }
752 };
753 
754 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
755 public:
756  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
757  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
758  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
759  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
760  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
761  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
762  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
763  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
764  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
765  kmp_int32 is_constrained) {
766  return __kmp_execute_tasks_64(
767  this_thr, gtid, this, final_spin,
768  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
769  }
770  void wait(kmp_info_t *this_thr,
771  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
772  if (final_spin)
773  __kmp_wait_template<kmp_flag_64, TRUE>(
774  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
775  else
776  __kmp_wait_template<kmp_flag_64, FALSE>(
777  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
778  }
779  void release() { __kmp_release_template(this); }
780  flag_type get_ptr_type() { return flag64; }
781 };
782 
783 // Hierarchical 64-bit on-core barrier instantiation
784 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
785  kmp_uint64 checker;
786  kmp_info_t *waiting_threads[1];
787  kmp_uint32 num_waiting_threads;
788  kmp_uint32
789  offset;
790  bool flag_switch;
791  enum barrier_type bt;
792  kmp_info_t *this_thr;
794 #if USE_ITT_BUILD
795  void *
796  itt_sync_obj;
797 #endif
798  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
799  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
800  }
801 
802 public:
803  kmp_flag_oncore(volatile kmp_uint64 *p)
804  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
805  flag_switch(false) {}
806  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
807  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
808  offset(idx), flag_switch(false) {}
809  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
810  enum barrier_type bar_t,
811  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
812  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
813  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
814  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
815  kmp_info_t *get_waiter(kmp_uint32 i) {
816  KMP_DEBUG_ASSERT(i < num_waiting_threads);
817  return waiting_threads[i];
818  }
819  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
820  void set_waiter(kmp_info_t *thr) {
821  waiting_threads[0] = thr;
822  num_waiting_threads = 1;
823  }
824  bool done_check_val(kmp_uint64 old_loc) {
825  return byteref(&old_loc, offset) == checker;
826  }
827  bool done_check() { return done_check_val(*get()); }
828  bool notdone_check() {
829  // Calculate flag_switch
830  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
831  flag_switch = true;
832  if (byteref(get(), offset) != 1 && !flag_switch)
833  return true;
834  else if (flag_switch) {
835  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
836  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
837  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
838  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
839  }
840  return false;
841  }
842  void internal_release() {
843  // Other threads can write their own bytes simultaneously.
844  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
845  byteref(get(), offset) = 1;
846  } else {
847  kmp_uint64 mask = 0;
848  byteref(&mask, offset) = 1;
849  KMP_TEST_THEN_OR64(get(), mask);
850  }
851  }
852  kmp_uint64 set_sleeping() {
853  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
854  }
855  kmp_uint64 unset_sleeping() {
856  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
857  }
858  bool is_sleeping_val(kmp_uint64 old_loc) {
859  return old_loc & KMP_BARRIER_SLEEP_STATE;
860  }
861  bool is_sleeping() { return is_sleeping_val(*get()); }
862  bool is_any_sleeping() { return is_sleeping_val(*get()); }
863  void wait(kmp_info_t *this_thr, int final_spin) {
864  if (final_spin)
865  __kmp_wait_template<kmp_flag_oncore, TRUE>(
866  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
867  else
868  __kmp_wait_template<kmp_flag_oncore, FALSE>(
869  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
870  }
871  void release() { __kmp_release_template(this); }
872  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
873  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
874  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
875  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
876  kmp_int32 is_constrained) {
877  return __kmp_execute_tasks_oncore(
878  this_thr, gtid, this, final_spin,
879  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
880  }
881  kmp_uint8 *get_stolen() { return NULL; }
882  enum barrier_type get_bt() { return bt; }
883  flag_type get_ptr_type() { return flag_oncore; }
884 };
885 
886 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
887 // associated with int gtid.
888 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
889  if (!flag)
890  return;
891 
892  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
893  case flag32:
894  __kmp_resume_32(gtid, NULL);
895  break;
896  case flag64:
897  __kmp_resume_64(gtid, NULL);
898  break;
899  case flag_oncore:
900  __kmp_resume_oncore(gtid, NULL);
901  break;
902  }
903 }
904 
909 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64