14 #ifndef KMP_WAIT_RELEASE_H 15 #define KMP_WAIT_RELEASE_H 19 #include "kmp_stats.h" 21 #include "ompt-specific.h" 56 volatile P *
get() {
return loc; }
57 void *get_void_p() {
return RCAST(
void *, CCAST(P *, loc)); }
58 void set(
volatile P *new_loc) { loc = new_loc; }
60 P load() {
return *loc; }
61 void store(P val) { *loc = val; }
78 std::atomic<P> *
get() {
return loc; }
86 void set(std::atomic<P> *new_loc) { loc = new_loc; }
94 P
load() {
return loc->load(std::memory_order_acquire); }
98 void store(P val) { loc->store(val, std::memory_order_release); }
123 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
124 ompt_state_t ompt_state,
126 int ds_tid = this_thr->th.th_info.ds.ds_tid;
127 if (ompt_state == ompt_state_wait_barrier_implicit) {
128 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
130 void *codeptr = NULL;
131 if (ompt_enabled.ompt_callback_sync_region_wait) {
132 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
133 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
140 if (!KMP_MASTER_TID(ds_tid)) {
141 if (ompt_enabled.ompt_callback_implicit_task) {
142 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
143 ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit);
146 this_thr->th.ompt_thread_info.state = ompt_state_idle;
148 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
159 template <
class C,
int final_spin>
161 __kmp_wait_template(kmp_info_t *this_thr,
162 C *flag USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
163 #if USE_ITT_BUILD && USE_ITT_NOTIFY 164 volatile void *spin = flag->get();
168 int tasks_completed = FALSE;
171 kmp_uint64 poll_count;
172 kmp_uint64 hibernate_goal;
174 kmp_uint32 hibernate;
177 KMP_FSYNC_SPIN_INIT(spin, NULL);
178 if (flag->done_check()) {
179 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
182 th_gtid = this_thr->th.th_info.ds.ds_gtid;
185 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
188 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
189 #if KMP_STATS_ENABLED 244 ompt_state_t ompt_entry_state;
246 if (ompt_enabled.enabled) {
247 ompt_entry_state = this_thr->th.ompt_thread_info.state;
248 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
249 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
250 ompt_lw_taskteam_t *team =
251 this_thr->th.th_team->t.ompt_serialized_team_info;
253 tId = &(team->ompt_task_info.task_data);
255 tId = OMPT_CUR_TASK_DATA(this_thr);
258 tId = &(this_thr->th.ompt_thread_info.task_data);
260 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
261 this_thr->th.th_task_team == NULL)) {
263 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
269 KMP_INIT_YIELD(spins);
271 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
273 || __kmp_pause_status == kmp_soft_paused
279 #ifdef KMP_ADJUST_BLOCKTIME 282 __kmp_pause_status == kmp_soft_paused ||
284 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
289 hibernate = this_thr->th.th_team_bt_intervals;
291 hibernate = this_thr->th.th_team_bt_intervals;
302 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
303 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
304 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
305 hibernate - __kmp_global.g.g_time.dt.t_value));
308 if (__kmp_pause_status == kmp_soft_paused) {
310 hibernate_goal = KMP_NOW();
313 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
315 #endif // KMP_USE_MONITOR 318 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
322 while (flag->notdone_check()) {
324 kmp_task_team_t *task_team = NULL;
325 if (__kmp_tasking_mode != tskm_immediate_exec) {
326 task_team = this_thr->th.th_task_team;
334 if (task_team != NULL) {
335 if (TCR_SYNC_4(task_team->tt.tt_active)) {
336 if (KMP_TASKING_ENABLED(task_team))
338 this_thr, th_gtid, final_spin,
339 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
341 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
343 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
346 if (final_spin && ompt_enabled.enabled)
347 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
349 this_thr->th.th_task_team = NULL;
350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
353 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
357 KMP_FSYNC_SPIN_PREPARE(CCAST(
void *, spin));
358 if (TCR_4(__kmp_global.g.g_done)) {
359 if (__kmp_global.g.g_abort)
360 __kmp_abort_thread();
369 if (oversubscribed) {
372 KMP_YIELD_SPIN(spins);
376 in_pool = !!TCR_4(this_thr->th.th_in_pool);
377 if (in_pool != !!this_thr->th.th_active_in_pool) {
379 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
380 this_thr->th.th_active_in_pool = TRUE;
389 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
390 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
391 this_thr->th.th_active_in_pool = FALSE;
395 #if KMP_STATS_ENABLED 398 if (this_thr->th.th_stats->isIdle() &&
399 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
400 KMP_SET_THREAD_STATE(IDLE);
401 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
406 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME
408 && __kmp_pause_status != kmp_soft_paused
414 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
419 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
422 if (KMP_BLOCKING(hibernate_goal, poll_count++))
427 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
428 __kmp_pause_status != kmp_soft_paused)
432 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
436 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
438 flag->suspend(th_gtid);
441 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
444 if (TCR_4(__kmp_global.g.g_done)) {
445 if (__kmp_global.g.g_abort)
446 __kmp_abort_thread();
448 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
449 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
450 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
456 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
457 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
460 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
461 ompt_exit_state = this_thr->th.ompt_thread_info.state;
464 if (ompt_exit_state == ompt_state_idle) {
465 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
469 #if KMP_STATS_ENABLED 471 if (KMP_GET_THREAD_STATE() == IDLE) {
472 KMP_POP_PARTITIONED_TIMER();
473 KMP_SET_THREAD_STATE(thread_state);
474 this_thr->th.th_stats->resetIdleFlag();
480 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
482 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
489 template <
class C>
static inline void __kmp_release_template(C *flag) {
491 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
493 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
494 KMP_DEBUG_ASSERT(flag->get());
495 KMP_FSYNC_RELEASING(flag->get_void_p());
497 flag->internal_release();
499 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
502 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
505 if (flag->is_any_sleeping()) {
506 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
508 kmp_info_t *waiter = flag->get_waiter(i);
510 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
512 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep " 514 gtid, wait_gtid, flag->get()));
515 flag->resume(wait_gtid);
522 template <
typename FlagType>
struct flag_traits {};
524 template <>
struct flag_traits<kmp_uint32> {
525 typedef kmp_uint32 flag_t;
527 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
528 static inline flag_t test_then_add4(
volatile flag_t *f) {
529 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
531 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
532 return KMP_TEST_THEN_OR32(f, v);
534 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
535 return KMP_TEST_THEN_AND32(f, v);
539 template <>
struct flag_traits<kmp_uint64> {
540 typedef kmp_uint64 flag_t;
542 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
543 static inline flag_t test_then_add4(
volatile flag_t *f) {
544 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
546 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
547 return KMP_TEST_THEN_OR64(f, v);
549 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
550 return KMP_TEST_THEN_AND64(f, v);
555 template <
typename FlagType>
557 typedef flag_traits<FlagType> traits_type;
565 kmp_basic_flag_native(
volatile FlagType *p)
567 kmp_basic_flag_native(
volatile FlagType *p, kmp_info_t *thr)
569 waiting_threads[0] = thr;
571 kmp_basic_flag_native(
volatile FlagType *p, FlagType c)
573 num_waiting_threads(0) {}
578 kmp_info_t *get_waiter(kmp_uint32 i) {
579 KMP_DEBUG_ASSERT(i < num_waiting_threads);
580 return waiting_threads[i];
585 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
591 void set_waiter(kmp_info_t *thr) {
592 waiting_threads[0] = thr;
593 num_waiting_threads = 1;
598 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
603 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
611 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
616 void internal_release() {
617 (void)traits_type::test_then_add4((
volatile FlagType *)this->
get());
624 FlagType set_sleeping() {
625 return traits_type::test_then_or((
volatile FlagType *)this->
get(),
626 KMP_BARRIER_SLEEP_STATE);
633 FlagType unset_sleeping() {
634 return traits_type::test_then_and((
volatile FlagType *)this->
get(),
635 ~KMP_BARRIER_SLEEP_STATE);
641 bool is_sleeping_val(FlagType old_loc) {
642 return old_loc & KMP_BARRIER_SLEEP_STATE;
647 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
648 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
649 kmp_uint8 *get_stolen() {
return NULL; }
650 enum barrier_type get_bt() {
return bs_last_barrier; }
653 template <
typename FlagType>
class kmp_basic_flag :
public kmp_flag<FlagType> {
654 typedef flag_traits<FlagType> traits_type;
662 kmp_basic_flag(std::atomic<FlagType> *p)
664 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
666 waiting_threads[0] = thr;
668 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
670 num_waiting_threads(0) {}
675 kmp_info_t *get_waiter(kmp_uint32 i) {
676 KMP_DEBUG_ASSERT(i < num_waiting_threads);
677 return waiting_threads[i];
682 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
688 void set_waiter(kmp_info_t *thr) {
689 waiting_threads[0] = thr;
690 num_waiting_threads = 1;
695 bool done_check() {
return this->load() == checker; }
700 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
708 bool notdone_check() {
return this->load() != checker; }
713 void internal_release() { KMP_ATOMIC_ADD(this->
get(), 4); }
719 FlagType set_sleeping() {
720 return KMP_ATOMIC_OR(this->
get(), KMP_BARRIER_SLEEP_STATE);
727 FlagType unset_sleeping() {
728 return KMP_ATOMIC_AND(this->
get(), ~KMP_BARRIER_SLEEP_STATE);
734 bool is_sleeping_val(FlagType old_loc) {
735 return old_loc & KMP_BARRIER_SLEEP_STATE;
740 bool is_sleeping() {
return is_sleeping_val(this->load()); }
741 bool is_any_sleeping() {
return is_sleeping_val(this->load()); }
742 kmp_uint8 *get_stolen() {
return NULL; }
743 enum barrier_type get_bt() {
return bs_last_barrier; }
746 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
748 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
749 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
750 : kmp_basic_flag<kmp_uint32>(p, thr) {}
751 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
752 : kmp_basic_flag<kmp_uint32>(p, c) {}
753 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
754 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
755 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
756 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
757 kmp_int32 is_constrained) {
758 return __kmp_execute_tasks_32(
759 this_thr, gtid,
this, final_spin,
760 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
762 void wait(kmp_info_t *this_thr,
763 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
765 __kmp_wait_template<kmp_flag_32, TRUE>(
766 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
768 __kmp_wait_template<kmp_flag_32, FALSE>(
769 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
771 void release() { __kmp_release_template(
this); }
775 class kmp_flag_64 :
public kmp_basic_flag_native<kmp_uint64> {
777 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
778 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
779 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
780 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
781 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
782 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
783 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
784 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
785 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
786 kmp_int32 is_constrained) {
787 return __kmp_execute_tasks_64(
788 this_thr, gtid,
this, final_spin,
789 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
791 void wait(kmp_info_t *this_thr,
792 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
794 __kmp_wait_template<kmp_flag_64, TRUE>(
795 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
797 __kmp_wait_template<kmp_flag_64, FALSE>(
798 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
800 void release() { __kmp_release_template(
this); }
807 kmp_info_t *waiting_threads[1];
808 kmp_uint32 num_waiting_threads;
812 enum barrier_type bt;
813 kmp_info_t *this_thr;
819 unsigned char &byteref(
volatile kmp_uint64 *loc,
size_t offset) {
820 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
824 kmp_flag_oncore(
volatile kmp_uint64 *p)
826 flag_switch(
false) {}
827 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
829 offset(idx), flag_switch(
false) {}
830 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
831 enum barrier_type bar_t,
832 kmp_info_t *thr USE_ITT_BUILD_ARG(
void *itt))
834 num_waiting_threads(0), offset(idx), flag_switch(
false), bt(bar_t),
835 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
836 kmp_info_t *get_waiter(kmp_uint32 i) {
837 KMP_DEBUG_ASSERT(i < num_waiting_threads);
838 return waiting_threads[i];
840 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
841 void set_waiter(kmp_info_t *thr) {
842 waiting_threads[0] = thr;
843 num_waiting_threads = 1;
845 bool done_check_val(kmp_uint64 old_loc) {
846 return byteref(&old_loc, offset) == checker;
848 bool done_check() {
return done_check_val(*
get()); }
849 bool notdone_check() {
851 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
853 if (byteref(
get(), offset) != 1 && !flag_switch)
855 else if (flag_switch) {
856 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
857 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
858 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
859 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
863 void internal_release() {
865 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
866 byteref(
get(), offset) = 1;
869 byteref(&mask, offset) = 1;
870 KMP_TEST_THEN_OR64(
get(), mask);
873 kmp_uint64 set_sleeping() {
874 return KMP_TEST_THEN_OR64(
get(), KMP_BARRIER_SLEEP_STATE);
876 kmp_uint64 unset_sleeping() {
877 return KMP_TEST_THEN_AND64(
get(), ~KMP_BARRIER_SLEEP_STATE);
879 bool is_sleeping_val(kmp_uint64 old_loc) {
880 return old_loc & KMP_BARRIER_SLEEP_STATE;
882 bool is_sleeping() {
return is_sleeping_val(*
get()); }
883 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
884 void wait(kmp_info_t *this_thr,
int final_spin) {
886 __kmp_wait_template<kmp_flag_oncore, TRUE>(
887 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
889 __kmp_wait_template<kmp_flag_oncore, FALSE>(
890 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
892 void release() { __kmp_release_template(
this); }
893 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
894 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
895 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
896 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
897 kmp_int32 is_constrained) {
898 return __kmp_execute_tasks_oncore(
899 this_thr, gtid,
this, final_spin,
900 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
902 kmp_uint8 *get_stolen() {
return NULL; }
903 enum barrier_type get_bt() {
return bt; }
909 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
913 switch (RCAST(kmp_flag_64 *, CCAST(
void *, flag))->get_type()) {
915 __kmp_resume_32(gtid, NULL);
918 __kmp_resume_64(gtid, NULL);
921 __kmp_resume_oncore(gtid, NULL);
930 #endif // KMP_WAIT_RELEASE_H
stats_state_e
the states which a thread can be in