LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef KMP_WAIT_RELEASE_H
15 #define KMP_WAIT_RELEASE_H
16 
17 #include "kmp.h"
18 #include "kmp_itt.h"
19 #include "kmp_stats.h"
20 #if OMPT_SUPPORT
21 #include "ompt-specific.h"
22 #endif
23 
40 enum flag_type {
44 };
45 
49 template <typename P> class kmp_flag_native {
50  volatile P *loc;
51  flag_type t;
52 
53 public:
54  typedef P flag_t;
55  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56  volatile P *get() { return loc; }
57  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
58  void set(volatile P *new_loc) { loc = new_loc; }
59  flag_type get_type() { return t; }
60  P load() { return *loc; }
61  void store(P val) { *loc = val; }
62 };
63 
67 template <typename P> class kmp_flag {
68  std::atomic<P>
69  *loc;
72 public:
73  typedef P flag_t;
74  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
78  std::atomic<P> *get() { return loc; }
82  void *get_void_p() { return RCAST(void *, loc); }
86  void set(std::atomic<P> *new_loc) { loc = new_loc; }
90  flag_type get_type() { return t; }
94  P load() { return loc->load(std::memory_order_acquire); }
98  void store(P val) { loc->store(val, std::memory_order_release); }
99  // Derived classes must provide the following:
100  /*
101  kmp_info_t * get_waiter(kmp_uint32 i);
102  kmp_uint32 get_num_waiters();
103  bool done_check();
104  bool done_check_val(P old_loc);
105  bool notdone_check();
106  P internal_release();
107  void suspend(int th_gtid);
108  void resume(int th_gtid);
109  P set_sleeping();
110  P unset_sleeping();
111  bool is_sleeping();
112  bool is_any_sleeping();
113  bool is_sleeping_val(P old_loc);
114  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
115  int *thread_finished
116  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
117  is_constrained);
118  */
119 };
120 
121 #if OMPT_SUPPORT
122 OMPT_NOINLINE
123 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
124  ompt_state_t ompt_state,
125  ompt_data_t *tId) {
126  int ds_tid = this_thr->th.th_info.ds.ds_tid;
127  if (ompt_state == ompt_state_wait_barrier_implicit) {
128  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
129 #if OMPT_OPTIONAL
130  void *codeptr = NULL;
131  if (ompt_enabled.ompt_callback_sync_region_wait) {
132  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
133  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
138  }
139 #endif
140  if (!KMP_MASTER_TID(ds_tid)) {
141  if (ompt_enabled.ompt_callback_implicit_task) {
142  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
143  ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
144  }
145  // return to idle state
146  this_thr->th.ompt_thread_info.state = ompt_state_idle;
147  } else {
148  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
149  }
150  }
151 }
152 #endif
153 
154 /* Spin wait loop that first does pause, then yield, then sleep. A thread that
155  calls __kmp_wait_* must make certain that another thread calls __kmp_release
156  to wake it back up to prevent deadlocks!
157 
158  NOTE: We may not belong to a team at this point. */
159 template <class C, int final_spin>
160 static inline void
161 __kmp_wait_template(kmp_info_t *this_thr,
162  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY
164  volatile void *spin = flag->get();
165 #endif
166  kmp_uint32 spins;
167  int th_gtid;
168  int tasks_completed = FALSE;
169  int oversubscribed;
170 #if !KMP_USE_MONITOR
171  kmp_uint64 poll_count;
172  kmp_uint64 hibernate_goal;
173 #else
174  kmp_uint32 hibernate;
175 #endif
176 
177  KMP_FSYNC_SPIN_INIT(spin, NULL);
178  if (flag->done_check()) {
179  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
180  return;
181  }
182  th_gtid = this_thr->th.th_info.ds.ds_gtid;
183 #if KMP_OS_UNIX
184  if (final_spin)
185  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
186 #endif
187  KA_TRACE(20,
188  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
189 #if KMP_STATS_ENABLED
190  stats_state_e thread_state = KMP_GET_THREAD_STATE();
191 #endif
192 
193 /* OMPT Behavior:
194 THIS function is called from
195  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
196  these have join / fork behavior
197 
198  In these cases, we don't change the state or trigger events in THIS
199 function.
200  Events are triggered in the calling code (__kmp_barrier):
201 
202  state := ompt_state_overhead
203  barrier-begin
204  barrier-wait-begin
205  state := ompt_state_wait_barrier
206  call join-barrier-implementation (finally arrive here)
207  {}
208  call fork-barrier-implementation (finally arrive here)
209  {}
210  state := ompt_state_overhead
211  barrier-wait-end
212  barrier-end
213  state := ompt_state_work_parallel
214 
215 
216  __kmp_fork_barrier (after thread creation, before executing implicit task)
217  call fork-barrier-implementation (finally arrive here)
218  {} // worker arrive here with state = ompt_state_idle
219 
220 
221  __kmp_join_barrier (implicit barrier at end of parallel region)
222  state := ompt_state_barrier_implicit
223  barrier-begin
224  barrier-wait-begin
225  call join-barrier-implementation (finally arrive here
226 final_spin=FALSE)
227  {
228  }
229  __kmp_fork_barrier (implicit barrier at end of parallel region)
230  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
231 
232  Worker after task-team is finished:
233  barrier-wait-end
234  barrier-end
235  implicit-task-end
236  idle-begin
237  state := ompt_state_idle
238 
239  Before leaving, if state = ompt_state_idle
240  idle-end
241  state := ompt_state_overhead
242 */
243 #if OMPT_SUPPORT
244  ompt_state_t ompt_entry_state;
245  ompt_data_t *tId;
246  if (ompt_enabled.enabled) {
247  ompt_entry_state = this_thr->th.ompt_thread_info.state;
248  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
249  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
250  ompt_lw_taskteam_t *team =
251  this_thr->th.th_team->t.ompt_serialized_team_info;
252  if (team) {
253  tId = &(team->ompt_task_info.task_data);
254  } else {
255  tId = OMPT_CUR_TASK_DATA(this_thr);
256  }
257  } else {
258  tId = &(this_thr->th.ompt_thread_info.task_data);
259  }
260  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
261  this_thr->th.th_task_team == NULL)) {
262  // implicit task is done. Either no taskqueue, or task-team finished
263  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
264  }
265  }
266 #endif
267 
268  // Setup for waiting
269  KMP_INIT_YIELD(spins);
270 
271  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
272 #if OMP_50_ENABLED
273  || __kmp_pause_status == kmp_soft_paused
274 #endif
275  ) {
276 #if KMP_USE_MONITOR
277 // The worker threads cannot rely on the team struct existing at this point.
278 // Use the bt values cached in the thread struct instead.
279 #ifdef KMP_ADJUST_BLOCKTIME
280  if (
281 #if OMP_50_ENABLED
282  __kmp_pause_status == kmp_soft_paused ||
283 #endif
284  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
285  // Force immediate suspend if not set by user and more threads than
286  // available procs
287  hibernate = 0;
288  else
289  hibernate = this_thr->th.th_team_bt_intervals;
290 #else
291  hibernate = this_thr->th.th_team_bt_intervals;
292 #endif /* KMP_ADJUST_BLOCKTIME */
293 
294  /* If the blocktime is nonzero, we want to make sure that we spin wait for
295  the entirety of the specified #intervals, plus up to one interval more.
296  This increment make certain that this thread doesn't go to sleep too
297  soon. */
298  if (hibernate != 0)
299  hibernate++;
300 
301  // Add in the current time value.
302  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
303  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
304  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
305  hibernate - __kmp_global.g.g_time.dt.t_value));
306 #else
307 #if OMP_50_ENABLED
308  if (__kmp_pause_status == kmp_soft_paused) {
309  // Force immediate suspend
310  hibernate_goal = KMP_NOW();
311  } else
312 #endif
313  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
314  poll_count = 0;
315 #endif // KMP_USE_MONITOR
316  }
317 
318  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
319  KMP_MB();
320 
321  // Main wait spin loop
322  while (flag->notdone_check()) {
323  int in_pool;
324  kmp_task_team_t *task_team = NULL;
325  if (__kmp_tasking_mode != tskm_immediate_exec) {
326  task_team = this_thr->th.th_task_team;
327  /* If the thread's task team pointer is NULL, it means one of 3 things:
328  1) A newly-created thread is first being released by
329  __kmp_fork_barrier(), and its task team has not been set up yet.
330  2) All tasks have been executed to completion.
331  3) Tasking is off for this region. This could be because we are in a
332  serialized region (perhaps the outer one), or else tasking was manually
333  disabled (KMP_TASKING=0). */
334  if (task_team != NULL) {
335  if (TCR_SYNC_4(task_team->tt.tt_active)) {
336  if (KMP_TASKING_ENABLED(task_team))
337  flag->execute_tasks(
338  this_thr, th_gtid, final_spin,
339  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
340  else
341  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
342  } else {
343  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
344 #if OMPT_SUPPORT
345  // task-team is done now, other cases should be catched above
346  if (final_spin && ompt_enabled.enabled)
347  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
348 #endif
349  this_thr->th.th_task_team = NULL;
350  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351  }
352  } else {
353  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
354  } // if
355  } // if
356 
357  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
358  if (TCR_4(__kmp_global.g.g_done)) {
359  if (__kmp_global.g.g_abort)
360  __kmp_abort_thread();
361  break;
362  }
363 
364  // If we are oversubscribed, or have waited a bit (and
365  // KMP_LIBRARY=throughput), then yield
366  // TODO: Should it be number of cores instead of thread contexts? Like:
367  // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
368  // Need performance improvement data to make the change...
369  if (oversubscribed) {
370  KMP_YIELD(1);
371  } else {
372  KMP_YIELD_SPIN(spins);
373  }
374  // Check if this thread was transferred from a team
375  // to the thread pool (or vice-versa) while spinning.
376  in_pool = !!TCR_4(this_thr->th.th_in_pool);
377  if (in_pool != !!this_thr->th.th_active_in_pool) {
378  if (in_pool) { // Recently transferred from team to pool
379  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
380  this_thr->th.th_active_in_pool = TRUE;
381  /* Here, we cannot assert that:
382  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
383  __kmp_thread_pool_nth);
384  __kmp_thread_pool_nth is inc/dec'd by the master thread while the
385  fork/join lock is held, whereas __kmp_thread_pool_active_nth is
386  inc/dec'd asynchronously by the workers. The two can get out of sync
387  for brief periods of time. */
388  } else { // Recently transferred from pool to team
389  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
390  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
391  this_thr->th.th_active_in_pool = FALSE;
392  }
393  }
394 
395 #if KMP_STATS_ENABLED
396  // Check if thread has been signalled to idle state
397  // This indicates that the logical "join-barrier" has finished
398  if (this_thr->th.th_stats->isIdle() &&
399  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
400  KMP_SET_THREAD_STATE(IDLE);
401  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
402  }
403 #endif
404 
405  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
406  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME
407 #if OMP_50_ENABLED
408  && __kmp_pause_status != kmp_soft_paused
409 #endif
410  )
411  continue;
412 
413  // Don't suspend if there is a likelihood of new tasks being spawned.
414  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
415  continue;
416 
417 #if KMP_USE_MONITOR
418  // If we have waited a bit more, fall asleep
419  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
420  continue;
421 #else
422  if (KMP_BLOCKING(hibernate_goal, poll_count++))
423  continue;
424 #endif
425 
426 #if OMP_50_ENABLED
427  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
428  __kmp_pause_status != kmp_soft_paused)
429  continue;
430 #endif
431 
432  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
433 
434 #if KMP_OS_UNIX
435  if (final_spin)
436  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
437 #endif
438  flag->suspend(th_gtid);
439 #if KMP_OS_UNIX
440  if (final_spin)
441  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
442 #endif
443 
444  if (TCR_4(__kmp_global.g.g_done)) {
445  if (__kmp_global.g.g_abort)
446  __kmp_abort_thread();
447  break;
448  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
449  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
450  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
451  }
452  // TODO: If thread is done with work and times out, disband/free
453  }
454 
455 #if OMPT_SUPPORT
456  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
457  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
458 #if OMPT_OPTIONAL
459  if (final_spin) {
460  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
461  ompt_exit_state = this_thr->th.ompt_thread_info.state;
462  }
463 #endif
464  if (ompt_exit_state == ompt_state_idle) {
465  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
466  }
467  }
468 #endif
469 #if KMP_STATS_ENABLED
470  // If we were put into idle state, pop that off the state stack
471  if (KMP_GET_THREAD_STATE() == IDLE) {
472  KMP_POP_PARTITIONED_TIMER();
473  KMP_SET_THREAD_STATE(thread_state);
474  this_thr->th.th_stats->resetIdleFlag();
475  }
476 #endif
477 
478 #if KMP_OS_UNIX
479  if (final_spin)
480  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
481 #endif
482  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
483 }
484 
485 /* Release any threads specified as waiting on the flag by releasing the flag
486  and resume the waiting thread if indicated by the sleep bit(s). A thread that
487  calls __kmp_wait_template must call this function to wake up the potentially
488  sleeping thread and prevent deadlocks! */
489 template <class C> static inline void __kmp_release_template(C *flag) {
490 #ifdef KMP_DEBUG
491  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
492 #endif
493  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
494  KMP_DEBUG_ASSERT(flag->get());
495  KMP_FSYNC_RELEASING(flag->get_void_p());
496 
497  flag->internal_release();
498 
499  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
500  flag->load()));
501 
502  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
503  // Only need to check sleep stuff if infinite block time not set.
504  // Are *any* threads waiting on flag sleeping?
505  if (flag->is_any_sleeping()) {
506  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
507  // if sleeping waiter exists at i, sets current_waiter to i inside flag
508  kmp_info_t *waiter = flag->get_waiter(i);
509  if (waiter) {
510  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
511  // Wake up thread if needed
512  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
513  "flag(%p) set\n",
514  gtid, wait_gtid, flag->get()));
515  flag->resume(wait_gtid); // unsets flag's current_waiter when done
516  }
517  }
518  }
519  }
520 }
521 
522 template <typename FlagType> struct flag_traits {};
523 
524 template <> struct flag_traits<kmp_uint32> {
525  typedef kmp_uint32 flag_t;
526  static const flag_type t = flag32;
527  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
528  static inline flag_t test_then_add4(volatile flag_t *f) {
529  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
530  }
531  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
532  return KMP_TEST_THEN_OR32(f, v);
533  }
534  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
535  return KMP_TEST_THEN_AND32(f, v);
536  }
537 };
538 
539 template <> struct flag_traits<kmp_uint64> {
540  typedef kmp_uint64 flag_t;
541  static const flag_type t = flag64;
542  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
543  static inline flag_t test_then_add4(volatile flag_t *f) {
544  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
545  }
546  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
547  return KMP_TEST_THEN_OR64(f, v);
548  }
549  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
550  return KMP_TEST_THEN_AND64(f, v);
551  }
552 };
553 
554 // Basic flag that does not use C11 Atomics
555 template <typename FlagType>
556 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
557  typedef flag_traits<FlagType> traits_type;
558  FlagType checker;
560  kmp_info_t
561  *waiting_threads[1];
562  kmp_uint32
563  num_waiting_threads;
564 public:
565  kmp_basic_flag_native(volatile FlagType *p)
566  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
567  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
568  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
569  waiting_threads[0] = thr;
570  }
571  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
572  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
573  num_waiting_threads(0) {}
578  kmp_info_t *get_waiter(kmp_uint32 i) {
579  KMP_DEBUG_ASSERT(i < num_waiting_threads);
580  return waiting_threads[i];
581  }
585  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
591  void set_waiter(kmp_info_t *thr) {
592  waiting_threads[0] = thr;
593  num_waiting_threads = 1;
594  }
598  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
603  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
611  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
616  void internal_release() {
617  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
618  }
624  FlagType set_sleeping() {
625  return traits_type::test_then_or((volatile FlagType *)this->get(),
626  KMP_BARRIER_SLEEP_STATE);
627  }
633  FlagType unset_sleeping() {
634  return traits_type::test_then_and((volatile FlagType *)this->get(),
635  ~KMP_BARRIER_SLEEP_STATE);
636  }
641  bool is_sleeping_val(FlagType old_loc) {
642  return old_loc & KMP_BARRIER_SLEEP_STATE;
643  }
647  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
648  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
649  kmp_uint8 *get_stolen() { return NULL; }
650  enum barrier_type get_bt() { return bs_last_barrier; }
651 };
652 
653 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
654  typedef flag_traits<FlagType> traits_type;
655  FlagType checker;
657  kmp_info_t
658  *waiting_threads[1];
659  kmp_uint32
660  num_waiting_threads;
661 public:
662  kmp_basic_flag(std::atomic<FlagType> *p)
663  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
664  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
665  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
666  waiting_threads[0] = thr;
667  }
668  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
669  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
670  num_waiting_threads(0) {}
675  kmp_info_t *get_waiter(kmp_uint32 i) {
676  KMP_DEBUG_ASSERT(i < num_waiting_threads);
677  return waiting_threads[i];
678  }
682  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
688  void set_waiter(kmp_info_t *thr) {
689  waiting_threads[0] = thr;
690  num_waiting_threads = 1;
691  }
695  bool done_check() { return this->load() == checker; }
700  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
708  bool notdone_check() { return this->load() != checker; }
713  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
719  FlagType set_sleeping() {
720  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
721  }
727  FlagType unset_sleeping() {
728  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
729  }
734  bool is_sleeping_val(FlagType old_loc) {
735  return old_loc & KMP_BARRIER_SLEEP_STATE;
736  }
740  bool is_sleeping() { return is_sleeping_val(this->load()); }
741  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
742  kmp_uint8 *get_stolen() { return NULL; }
743  enum barrier_type get_bt() { return bs_last_barrier; }
744 };
745 
746 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
747 public:
748  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
749  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
750  : kmp_basic_flag<kmp_uint32>(p, thr) {}
751  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
752  : kmp_basic_flag<kmp_uint32>(p, c) {}
753  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
754  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
755  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
756  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
757  kmp_int32 is_constrained) {
758  return __kmp_execute_tasks_32(
759  this_thr, gtid, this, final_spin,
760  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
761  }
762  void wait(kmp_info_t *this_thr,
763  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
764  if (final_spin)
765  __kmp_wait_template<kmp_flag_32, TRUE>(
766  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
767  else
768  __kmp_wait_template<kmp_flag_32, FALSE>(
769  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
770  }
771  void release() { __kmp_release_template(this); }
772  flag_type get_ptr_type() { return flag32; }
773 };
774 
775 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
776 public:
777  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
778  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
779  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
780  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
781  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
782  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
783  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
784  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
785  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
786  kmp_int32 is_constrained) {
787  return __kmp_execute_tasks_64(
788  this_thr, gtid, this, final_spin,
789  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
790  }
791  void wait(kmp_info_t *this_thr,
792  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
793  if (final_spin)
794  __kmp_wait_template<kmp_flag_64, TRUE>(
795  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
796  else
797  __kmp_wait_template<kmp_flag_64, FALSE>(
798  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
799  }
800  void release() { __kmp_release_template(this); }
801  flag_type get_ptr_type() { return flag64; }
802 };
803 
804 // Hierarchical 64-bit on-core barrier instantiation
805 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
806  kmp_uint64 checker;
807  kmp_info_t *waiting_threads[1];
808  kmp_uint32 num_waiting_threads;
809  kmp_uint32
810  offset;
811  bool flag_switch;
812  enum barrier_type bt;
813  kmp_info_t *this_thr;
815 #if USE_ITT_BUILD
816  void *
817  itt_sync_obj;
818 #endif
819  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
820  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
821  }
822 
823 public:
824  kmp_flag_oncore(volatile kmp_uint64 *p)
825  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
826  flag_switch(false) {}
827  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
828  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
829  offset(idx), flag_switch(false) {}
830  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
831  enum barrier_type bar_t,
832  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
833  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
834  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
835  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
836  kmp_info_t *get_waiter(kmp_uint32 i) {
837  KMP_DEBUG_ASSERT(i < num_waiting_threads);
838  return waiting_threads[i];
839  }
840  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
841  void set_waiter(kmp_info_t *thr) {
842  waiting_threads[0] = thr;
843  num_waiting_threads = 1;
844  }
845  bool done_check_val(kmp_uint64 old_loc) {
846  return byteref(&old_loc, offset) == checker;
847  }
848  bool done_check() { return done_check_val(*get()); }
849  bool notdone_check() {
850  // Calculate flag_switch
851  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
852  flag_switch = true;
853  if (byteref(get(), offset) != 1 && !flag_switch)
854  return true;
855  else if (flag_switch) {
856  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
857  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
858  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
859  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
860  }
861  return false;
862  }
863  void internal_release() {
864  // Other threads can write their own bytes simultaneously.
865  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
866  byteref(get(), offset) = 1;
867  } else {
868  kmp_uint64 mask = 0;
869  byteref(&mask, offset) = 1;
870  KMP_TEST_THEN_OR64(get(), mask);
871  }
872  }
873  kmp_uint64 set_sleeping() {
874  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
875  }
876  kmp_uint64 unset_sleeping() {
877  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
878  }
879  bool is_sleeping_val(kmp_uint64 old_loc) {
880  return old_loc & KMP_BARRIER_SLEEP_STATE;
881  }
882  bool is_sleeping() { return is_sleeping_val(*get()); }
883  bool is_any_sleeping() { return is_sleeping_val(*get()); }
884  void wait(kmp_info_t *this_thr, int final_spin) {
885  if (final_spin)
886  __kmp_wait_template<kmp_flag_oncore, TRUE>(
887  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
888  else
889  __kmp_wait_template<kmp_flag_oncore, FALSE>(
890  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
891  }
892  void release() { __kmp_release_template(this); }
893  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
894  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
895  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
896  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
897  kmp_int32 is_constrained) {
898  return __kmp_execute_tasks_oncore(
899  this_thr, gtid, this, final_spin,
900  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
901  }
902  kmp_uint8 *get_stolen() { return NULL; }
903  enum barrier_type get_bt() { return bt; }
904  flag_type get_ptr_type() { return flag_oncore; }
905 };
906 
907 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
908 // associated with int gtid.
909 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
910  if (!flag)
911  return;
912 
913  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
914  case flag32:
915  __kmp_resume_32(gtid, NULL);
916  break;
917  case flag64:
918  __kmp_resume_64(gtid, NULL);
919  break;
920  case flag_oncore:
921  __kmp_resume_oncore(gtid, NULL);
922  break;
923  }
924 }
925 
930 #endif // KMP_WAIT_RELEASE_H
std::atomic< P > * loc
void * get_void_p()
flag_type get_type()
void store(P val)
flag_type
flag_type t
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:64