LLVM OpenMP* Runtime Library
z_Windows_NT_util.cpp
1 /*
2  * z_Windows_NT_util.cpp -- platform specific routines.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_affinity.h"
16 #include "kmp_i18n.h"
17 #include "kmp_io.h"
18 #include "kmp_itt.h"
19 #include "kmp_wait_release.h"
20 
21 /* This code is related to NtQuerySystemInformation() function. This function
22  is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
23  number of running threads in the system. */
24 
25 #include <ntsecapi.h> // UNICODE_STRING
26 #include <ntstatus.h>
27 
28 enum SYSTEM_INFORMATION_CLASS {
29  SystemProcessInformation = 5
30 }; // SYSTEM_INFORMATION_CLASS
31 
32 struct CLIENT_ID {
33  HANDLE UniqueProcess;
34  HANDLE UniqueThread;
35 }; // struct CLIENT_ID
36 
37 enum THREAD_STATE {
38  StateInitialized,
39  StateReady,
40  StateRunning,
41  StateStandby,
42  StateTerminated,
43  StateWait,
44  StateTransition,
45  StateUnknown
46 }; // enum THREAD_STATE
47 
48 struct VM_COUNTERS {
49  SIZE_T PeakVirtualSize;
50  SIZE_T VirtualSize;
51  ULONG PageFaultCount;
52  SIZE_T PeakWorkingSetSize;
53  SIZE_T WorkingSetSize;
54  SIZE_T QuotaPeakPagedPoolUsage;
55  SIZE_T QuotaPagedPoolUsage;
56  SIZE_T QuotaPeakNonPagedPoolUsage;
57  SIZE_T QuotaNonPagedPoolUsage;
58  SIZE_T PagefileUsage;
59  SIZE_T PeakPagefileUsage;
60  SIZE_T PrivatePageCount;
61 }; // struct VM_COUNTERS
62 
63 struct SYSTEM_THREAD {
64  LARGE_INTEGER KernelTime;
65  LARGE_INTEGER UserTime;
66  LARGE_INTEGER CreateTime;
67  ULONG WaitTime;
68  LPVOID StartAddress;
69  CLIENT_ID ClientId;
70  DWORD Priority;
71  LONG BasePriority;
72  ULONG ContextSwitchCount;
73  THREAD_STATE State;
74  ULONG WaitReason;
75 }; // SYSTEM_THREAD
76 
77 KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, KernelTime) == 0);
78 #if KMP_ARCH_X86
79 KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 28);
80 KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 52);
81 #else
82 KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 32);
83 KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 68);
84 #endif
85 
86 struct SYSTEM_PROCESS_INFORMATION {
87  ULONG NextEntryOffset;
88  ULONG NumberOfThreads;
89  LARGE_INTEGER Reserved[3];
90  LARGE_INTEGER CreateTime;
91  LARGE_INTEGER UserTime;
92  LARGE_INTEGER KernelTime;
93  UNICODE_STRING ImageName;
94  DWORD BasePriority;
95  HANDLE ProcessId;
96  HANDLE ParentProcessId;
97  ULONG HandleCount;
98  ULONG Reserved2[2];
99  VM_COUNTERS VMCounters;
100  IO_COUNTERS IOCounters;
101  SYSTEM_THREAD Threads[1];
102 }; // SYSTEM_PROCESS_INFORMATION
103 typedef SYSTEM_PROCESS_INFORMATION *PSYSTEM_PROCESS_INFORMATION;
104 
105 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, NextEntryOffset) == 0);
106 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, CreateTime) == 32);
107 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ImageName) == 56);
108 #if KMP_ARCH_X86
109 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 68);
110 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 76);
111 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 88);
112 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 136);
113 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 184);
114 #else
115 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 80);
116 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 96);
117 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 112);
118 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 208);
119 KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 256);
120 #endif
121 
122 typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS,
123  PVOID, ULONG, PULONG);
124 NtQuerySystemInformation_t NtQuerySystemInformation = NULL;
125 
126 HMODULE ntdll = NULL;
127 
128 /* End of NtQuerySystemInformation()-related code */
129 
130 static HMODULE kernel32 = NULL;
131 
132 #if KMP_HANDLE_SIGNALS
133 typedef void (*sig_func_t)(int);
134 static sig_func_t __kmp_sighldrs[NSIG];
135 static int __kmp_siginstalled[NSIG];
136 #endif
137 
138 #if KMP_USE_MONITOR
139 static HANDLE __kmp_monitor_ev;
140 #endif
141 static kmp_int64 __kmp_win32_time;
142 double __kmp_win32_tick;
143 
144 int __kmp_init_runtime = FALSE;
145 CRITICAL_SECTION __kmp_win32_section;
146 
147 void __kmp_win32_mutex_init(kmp_win32_mutex_t *mx) {
148  InitializeCriticalSection(&mx->cs);
149 #if USE_ITT_BUILD
150  __kmp_itt_system_object_created(&mx->cs, "Critical Section");
151 #endif /* USE_ITT_BUILD */
152 }
153 
154 void __kmp_win32_mutex_destroy(kmp_win32_mutex_t *mx) {
155  DeleteCriticalSection(&mx->cs);
156 }
157 
158 void __kmp_win32_mutex_lock(kmp_win32_mutex_t *mx) {
159  EnterCriticalSection(&mx->cs);
160 }
161 
162 int __kmp_win32_mutex_trylock(kmp_win32_mutex_t *mx) {
163  return TryEnterCriticalSection(&mx->cs);
164 }
165 
166 void __kmp_win32_mutex_unlock(kmp_win32_mutex_t *mx) {
167  LeaveCriticalSection(&mx->cs);
168 }
169 
170 void __kmp_win32_cond_init(kmp_win32_cond_t *cv) {
171  cv->waiters_count_ = 0;
172  cv->wait_generation_count_ = 0;
173  cv->release_count_ = 0;
174 
175  /* Initialize the critical section */
176  __kmp_win32_mutex_init(&cv->waiters_count_lock_);
177 
178  /* Create a manual-reset event. */
179  cv->event_ = CreateEvent(NULL, // no security
180  TRUE, // manual-reset
181  FALSE, // non-signaled initially
182  NULL); // unnamed
183 #if USE_ITT_BUILD
184  __kmp_itt_system_object_created(cv->event_, "Event");
185 #endif /* USE_ITT_BUILD */
186 }
187 
188 void __kmp_win32_cond_destroy(kmp_win32_cond_t *cv) {
189  __kmp_win32_mutex_destroy(&cv->waiters_count_lock_);
190  __kmp_free_handle(cv->event_);
191  memset(cv, '\0', sizeof(*cv));
192 }
193 
194 /* TODO associate cv with a team instead of a thread so as to optimize
195  the case where we wake up a whole team */
196 
197 void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx,
198  kmp_info_t *th, int need_decrease_load) {
199  int my_generation;
200  int last_waiter;
201 
202  /* Avoid race conditions */
203  __kmp_win32_mutex_lock(&cv->waiters_count_lock_);
204 
205  /* Increment count of waiters */
206  cv->waiters_count_++;
207 
208  /* Store current generation in our activation record. */
209  my_generation = cv->wait_generation_count_;
210 
211  __kmp_win32_mutex_unlock(&cv->waiters_count_lock_);
212  __kmp_win32_mutex_unlock(mx);
213 
214  for (;;) {
215  int wait_done;
216 
217  /* Wait until the event is signaled */
218  WaitForSingleObject(cv->event_, INFINITE);
219 
220  __kmp_win32_mutex_lock(&cv->waiters_count_lock_);
221 
222  /* Exit the loop when the <cv->event_> is signaled and there are still
223  waiting threads from this <wait_generation> that haven't been released
224  from this wait yet. */
225  wait_done = (cv->release_count_ > 0) &&
226  (cv->wait_generation_count_ != my_generation);
227 
228  __kmp_win32_mutex_unlock(&cv->waiters_count_lock_);
229 
230  /* there used to be a semicolon after the if statement, it looked like a
231  bug, so i removed it */
232  if (wait_done)
233  break;
234  }
235 
236  __kmp_win32_mutex_lock(mx);
237  __kmp_win32_mutex_lock(&cv->waiters_count_lock_);
238 
239  cv->waiters_count_--;
240  cv->release_count_--;
241 
242  last_waiter = (cv->release_count_ == 0);
243 
244  __kmp_win32_mutex_unlock(&cv->waiters_count_lock_);
245 
246  if (last_waiter) {
247  /* We're the last waiter to be notified, so reset the manual event. */
248  ResetEvent(cv->event_);
249  }
250 }
251 
252 void __kmp_win32_cond_broadcast(kmp_win32_cond_t *cv) {
253  __kmp_win32_mutex_lock(&cv->waiters_count_lock_);
254 
255  if (cv->waiters_count_ > 0) {
256  SetEvent(cv->event_);
257  /* Release all the threads in this generation. */
258 
259  cv->release_count_ = cv->waiters_count_;
260 
261  /* Start a new generation. */
262  cv->wait_generation_count_++;
263  }
264 
265  __kmp_win32_mutex_unlock(&cv->waiters_count_lock_);
266 }
267 
268 void __kmp_win32_cond_signal(kmp_win32_cond_t *cv) {
269  __kmp_win32_cond_broadcast(cv);
270 }
271 
272 void __kmp_enable(int new_state) {
273  if (__kmp_init_runtime)
274  LeaveCriticalSection(&__kmp_win32_section);
275 }
276 
277 void __kmp_disable(int *old_state) {
278  *old_state = 0;
279 
280  if (__kmp_init_runtime)
281  EnterCriticalSection(&__kmp_win32_section);
282 }
283 
284 void __kmp_suspend_initialize(void) { /* do nothing */
285 }
286 
287 static void __kmp_suspend_initialize_thread(kmp_info_t *th) {
288  if (!TCR_4(th->th.th_suspend_init)) {
289  /* this means we haven't initialized the suspension pthread objects for this
290  thread in this instance of the process */
291  __kmp_win32_cond_init(&th->th.th_suspend_cv);
292  __kmp_win32_mutex_init(&th->th.th_suspend_mx);
293  TCW_4(th->th.th_suspend_init, TRUE);
294  }
295 }
296 
297 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
298  if (TCR_4(th->th.th_suspend_init)) {
299  /* this means we have initialize the suspension pthread objects for this
300  thread in this instance of the process */
301  __kmp_win32_cond_destroy(&th->th.th_suspend_cv);
302  __kmp_win32_mutex_destroy(&th->th.th_suspend_mx);
303  TCW_4(th->th.th_suspend_init, FALSE);
304  }
305 }
306 
307 int __kmp_try_suspend_mx(kmp_info_t *th) {
308  return __kmp_win32_mutex_trylock(&th->th.th_suspend_mx);
309 }
310 
311 void __kmp_lock_suspend_mx(kmp_info_t *th) {
312  __kmp_win32_mutex_lock(&th->th.th_suspend_mx);
313 }
314 
315 void __kmp_unlock_suspend_mx(kmp_info_t *th) {
316  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
317 }
318 
319 /* This routine puts the calling thread to sleep after setting the
320  sleep bit for the indicated flag variable to true. */
321 template <class C>
322 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
323  kmp_info_t *th = __kmp_threads[th_gtid];
324  int status;
325  typename C::flag_t old_spin;
326 
327  KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n",
328  th_gtid, flag->get()));
329 
330  __kmp_suspend_initialize_thread(th);
331  __kmp_win32_mutex_lock(&th->th.th_suspend_mx);
332 
333  KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"
334  " loc(%p)\n",
335  th_gtid, flag->get()));
336 
337  /* TODO: shouldn't this use release semantics to ensure that
338  __kmp_suspend_initialize_thread gets called first? */
339  old_spin = flag->set_sleeping();
340 #if OMP_50_ENABLED
341  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
342  __kmp_pause_status != kmp_soft_paused) {
343  flag->unset_sleeping();
344  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
345  return;
346  }
347 #endif
348 
349  KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's"
350  " loc(%p)==%d\n",
351  th_gtid, flag->get(), *(flag->get())));
352 
353  if (flag->done_check_val(old_spin)) {
354  old_spin = flag->unset_sleeping();
355  KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
356  "for flag's loc(%p)\n",
357  th_gtid, flag->get()));
358  } else {
359 #ifdef DEBUG_SUSPEND
360  __kmp_suspend_count++;
361 #endif
362  /* Encapsulate in a loop as the documentation states that this may "with
363  low probability" return when the condition variable has not been signaled
364  or broadcast */
365  int deactivated = FALSE;
366  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
367  while (flag->is_sleeping()) {
368  KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
369  "kmp_win32_cond_wait()\n",
370  th_gtid));
371  // Mark the thread as no longer active (only in the first iteration of the
372  // loop).
373  if (!deactivated) {
374  th->th.th_active = FALSE;
375  if (th->th.th_active_in_pool) {
376  th->th.th_active_in_pool = FALSE;
377  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
378  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
379  }
380  deactivated = TRUE;
381 
382  __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0,
383  0);
384  } else {
385  __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0,
386  0);
387  }
388 
389 #ifdef KMP_DEBUG
390  if (flag->is_sleeping()) {
391  KF_TRACE(100,
392  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
393  }
394 #endif /* KMP_DEBUG */
395 
396  } // while
397 
398  // Mark the thread as active again (if it was previous marked as inactive)
399  if (deactivated) {
400  th->th.th_active = TRUE;
401  if (TCR_4(th->th.th_in_pool)) {
402  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
403  th->th.th_active_in_pool = TRUE;
404  }
405  }
406  }
407 
408  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
409 
410  KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
411 }
412 
413 void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
414  __kmp_suspend_template(th_gtid, flag);
415 }
416 void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
417  __kmp_suspend_template(th_gtid, flag);
418 }
419 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
420  __kmp_suspend_template(th_gtid, flag);
421 }
422 
423 /* This routine signals the thread specified by target_gtid to wake up
424  after setting the sleep bit indicated by the flag argument to FALSE */
425 template <class C>
426 static inline void __kmp_resume_template(int target_gtid, C *flag) {
427  kmp_info_t *th = __kmp_threads[target_gtid];
428  int status;
429 
430 #ifdef KMP_DEBUG
431  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
432 #endif
433 
434  KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
435  gtid, target_gtid));
436 
437  __kmp_suspend_initialize_thread(th);
438  __kmp_win32_mutex_lock(&th->th.th_suspend_mx);
439 
440  if (!flag) { // coming from __kmp_null_resume_wrapper
441  flag = (C *)th->th.th_sleep_loc;
442  }
443 
444  // First, check if the flag is null or its type has changed. If so, someone
445  // else woke it up.
446  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
447  // simply shows what
448  // flag was cast to
449  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
450  "awake: flag's loc(%p)\n",
451  gtid, target_gtid, NULL));
452  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
453  return;
454  } else {
455  typename C::flag_t old_spin = flag->unset_sleeping();
456  if (!flag->is_sleeping_val(old_spin)) {
457  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
458  "awake: flag's loc(%p): %u => %u\n",
459  gtid, target_gtid, flag->get(), old_spin, *(flag->get())));
460  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
461  return;
462  }
463  }
464  TCW_PTR(th->th.th_sleep_loc, NULL);
465  KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "
466  "bit for flag's loc(%p)\n",
467  gtid, target_gtid, flag->get()));
468 
469  __kmp_win32_cond_signal(&th->th.th_suspend_cv);
470  __kmp_win32_mutex_unlock(&th->th.th_suspend_mx);
471 
472  KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
473  " for T#%d\n",
474  gtid, target_gtid));
475 }
476 
477 void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
478  __kmp_resume_template(target_gtid, flag);
479 }
480 void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
481  __kmp_resume_template(target_gtid, flag);
482 }
483 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
484  __kmp_resume_template(target_gtid, flag);
485 }
486 
487 void __kmp_yield(int cond) {
488  if (cond)
489  Sleep(0);
490 }
491 
492 void __kmp_gtid_set_specific(int gtid) {
493  if (__kmp_init_gtid) {
494  KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid,
495  __kmp_gtid_threadprivate_key));
496  if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1)))
497  KMP_FATAL(TLSSetValueFailed);
498  } else {
499  KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
500  }
501 }
502 
503 int __kmp_gtid_get_specific() {
504  int gtid;
505  if (!__kmp_init_gtid) {
506  KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
507  "KMP_GTID_SHUTDOWN\n"));
508  return KMP_GTID_SHUTDOWN;
509  }
510  gtid = (int)(kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key);
511  if (gtid == 0) {
512  gtid = KMP_GTID_DNE;
513  } else {
514  gtid--;
515  }
516  KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
517  __kmp_gtid_threadprivate_key, gtid));
518  return gtid;
519 }
520 
521 void __kmp_affinity_bind_thread(int proc) {
522  if (__kmp_num_proc_groups > 1) {
523  // Form the GROUP_AFFINITY struct directly, rather than filling
524  // out a bit vector and calling __kmp_set_system_affinity().
525  GROUP_AFFINITY ga;
526  KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups * CHAR_BIT *
527  sizeof(DWORD_PTR))));
528  ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR));
529  ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR)));
530  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
531 
532  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
533  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
534  DWORD error = GetLastError();
535  if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
536  kmp_msg_t err_code = KMP_ERR(error);
537  __kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code,
538  __kmp_msg_null);
539  if (__kmp_generate_warnings == kmp_warnings_off) {
540  __kmp_str_free(&err_code.str);
541  }
542  }
543  }
544  } else {
545  kmp_affin_mask_t *mask;
546  KMP_CPU_ALLOC_ON_STACK(mask);
547  KMP_CPU_ZERO(mask);
548  KMP_CPU_SET(proc, mask);
549  __kmp_set_system_affinity(mask, TRUE);
550  KMP_CPU_FREE_FROM_STACK(mask);
551  }
552 }
553 
554 void __kmp_affinity_determine_capable(const char *env_var) {
555 // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask().
556 
557 #if KMP_GROUP_AFFINITY
558  KMP_AFFINITY_ENABLE(__kmp_num_proc_groups * sizeof(DWORD_PTR));
559 #else
560  KMP_AFFINITY_ENABLE(sizeof(DWORD_PTR));
561 #endif
562 
563  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
564  "Windows* OS affinity interface functional (mask size = "
565  "%" KMP_SIZE_T_SPEC ").\n",
566  __kmp_affin_mask_size));
567 }
568 
569 double __kmp_read_cpu_time(void) {
570  FILETIME CreationTime, ExitTime, KernelTime, UserTime;
571  int status;
572  double cpu_time;
573 
574  cpu_time = 0;
575 
576  status = GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime,
577  &KernelTime, &UserTime);
578 
579  if (status) {
580  double sec = 0;
581 
582  sec += KernelTime.dwHighDateTime;
583  sec += UserTime.dwHighDateTime;
584 
585  /* Shift left by 32 bits */
586  sec *= (double)(1 << 16) * (double)(1 << 16);
587 
588  sec += KernelTime.dwLowDateTime;
589  sec += UserTime.dwLowDateTime;
590 
591  cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC;
592  }
593 
594  return cpu_time;
595 }
596 
597 int __kmp_read_system_info(struct kmp_sys_info *info) {
598  info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */
599  info->minflt = 0; /* the number of page faults serviced without any I/O */
600  info->majflt = 0; /* the number of page faults serviced that required I/O */
601  info->nswap = 0; // the number of times a process was "swapped" out of memory
602  info->inblock = 0; // the number of times the file system had to perform input
603  info->oublock = 0; // number of times the file system had to perform output
604  info->nvcsw = 0; /* the number of times a context switch was voluntarily */
605  info->nivcsw = 0; /* the number of times a context switch was forced */
606 
607  return 1;
608 }
609 
610 void __kmp_runtime_initialize(void) {
611  SYSTEM_INFO info;
612  kmp_str_buf_t path;
613  UINT path_size;
614 
615  if (__kmp_init_runtime) {
616  return;
617  }
618 
619 #if KMP_DYNAMIC_LIB
620  /* Pin dynamic library for the lifetime of application */
621  {
622  // First, turn off error message boxes
623  UINT err_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
624  HMODULE h;
625  BOOL ret = GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
626  GET_MODULE_HANDLE_EX_FLAG_PIN,
627  (LPCTSTR)&__kmp_serial_initialize, &h);
628  KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded");
629  SetErrorMode(err_mode); // Restore error mode
630  KA_TRACE(10, ("__kmp_runtime_initialize: dynamic library pinned\n"));
631  }
632 #endif
633 
634  InitializeCriticalSection(&__kmp_win32_section);
635 #if USE_ITT_BUILD
636  __kmp_itt_system_object_created(&__kmp_win32_section, "Critical Section");
637 #endif /* USE_ITT_BUILD */
638  __kmp_initialize_system_tick();
639 
640 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
641  if (!__kmp_cpuinfo.initialized) {
642  __kmp_query_cpuid(&__kmp_cpuinfo);
643  }
644 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
645 
646 /* Set up minimum number of threads to switch to TLS gtid */
647 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
648  // Windows* OS, static library.
649  /* New thread may use stack space previously used by another thread,
650  currently terminated. On Windows* OS, in case of static linking, we do not
651  know the moment of thread termination, and our structures (__kmp_threads
652  and __kmp_root arrays) are still keep info about dead threads. This leads
653  to problem in __kmp_get_global_thread_id() function: it wrongly finds gtid
654  (by searching through stack addresses of all known threads) for
655  unregistered foreign tread.
656 
657  Setting __kmp_tls_gtid_min to 0 workarounds this problem:
658  __kmp_get_global_thread_id() does not search through stacks, but get gtid
659  from TLS immediately.
660  --ln
661  */
662  __kmp_tls_gtid_min = 0;
663 #else
664  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
665 #endif
666 
667  /* for the static library */
668  if (!__kmp_gtid_threadprivate_key) {
669  __kmp_gtid_threadprivate_key = TlsAlloc();
670  if (__kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES) {
671  KMP_FATAL(TLSOutOfIndexes);
672  }
673  }
674 
675  // Load ntdll.dll.
676  /* Simple GetModuleHandle( "ntdll.dl" ) is not suitable due to security issue
677  (see http://www.microsoft.com/technet/security/advisory/2269637.mspx). We
678  have to specify full path to the library. */
679  __kmp_str_buf_init(&path);
680  path_size = GetSystemDirectory(path.str, path.size);
681  KMP_DEBUG_ASSERT(path_size > 0);
682  if (path_size >= path.size) {
683  // Buffer is too short. Expand the buffer and try again.
684  __kmp_str_buf_reserve(&path, path_size);
685  path_size = GetSystemDirectory(path.str, path.size);
686  KMP_DEBUG_ASSERT(path_size > 0);
687  }
688  if (path_size > 0 && path_size < path.size) {
689  // Now we have system directory name in the buffer.
690  // Append backslash and name of dll to form full path,
691  path.used = path_size;
692  __kmp_str_buf_print(&path, "\\%s", "ntdll.dll");
693 
694  // Now load ntdll using full path.
695  ntdll = GetModuleHandle(path.str);
696  }
697 
698  KMP_DEBUG_ASSERT(ntdll != NULL);
699  if (ntdll != NULL) {
700  NtQuerySystemInformation = (NtQuerySystemInformation_t)GetProcAddress(
701  ntdll, "NtQuerySystemInformation");
702  }
703  KMP_DEBUG_ASSERT(NtQuerySystemInformation != NULL);
704 
705 #if KMP_GROUP_AFFINITY
706  // Load kernel32.dll.
707  // Same caveat - must use full system path name.
708  if (path_size > 0 && path_size < path.size) {
709  // Truncate the buffer back to just the system path length,
710  // discarding "\\ntdll.dll", and replacing it with "kernel32.dll".
711  path.used = path_size;
712  __kmp_str_buf_print(&path, "\\%s", "kernel32.dll");
713 
714  // Load kernel32.dll using full path.
715  kernel32 = GetModuleHandle(path.str);
716  KA_TRACE(10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str));
717 
718  // Load the function pointers to kernel32.dll routines
719  // that may or may not exist on this system.
720  if (kernel32 != NULL) {
721  __kmp_GetActiveProcessorCount =
722  (kmp_GetActiveProcessorCount_t)GetProcAddress(
723  kernel32, "GetActiveProcessorCount");
724  __kmp_GetActiveProcessorGroupCount =
725  (kmp_GetActiveProcessorGroupCount_t)GetProcAddress(
726  kernel32, "GetActiveProcessorGroupCount");
727  __kmp_GetThreadGroupAffinity =
728  (kmp_GetThreadGroupAffinity_t)GetProcAddress(
729  kernel32, "GetThreadGroupAffinity");
730  __kmp_SetThreadGroupAffinity =
731  (kmp_SetThreadGroupAffinity_t)GetProcAddress(
732  kernel32, "SetThreadGroupAffinity");
733 
734  KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount"
735  " = %p\n",
736  __kmp_GetActiveProcessorCount));
737  KA_TRACE(10, ("__kmp_runtime_initialize: "
738  "__kmp_GetActiveProcessorGroupCount = %p\n",
739  __kmp_GetActiveProcessorGroupCount));
740  KA_TRACE(10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity"
741  " = %p\n",
742  __kmp_GetThreadGroupAffinity));
743  KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity"
744  " = %p\n",
745  __kmp_SetThreadGroupAffinity));
746  KA_TRACE(10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n",
747  sizeof(kmp_affin_mask_t)));
748 
749  // See if group affinity is supported on this system.
750  // If so, calculate the #groups and #procs.
751  //
752  // Group affinity was introduced with Windows* 7 OS and
753  // Windows* Server 2008 R2 OS.
754  if ((__kmp_GetActiveProcessorCount != NULL) &&
755  (__kmp_GetActiveProcessorGroupCount != NULL) &&
756  (__kmp_GetThreadGroupAffinity != NULL) &&
757  (__kmp_SetThreadGroupAffinity != NULL) &&
758  ((__kmp_num_proc_groups = __kmp_GetActiveProcessorGroupCount()) >
759  1)) {
760  // Calculate the total number of active OS procs.
761  int i;
762 
763  KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups"
764  " detected\n",
765  __kmp_num_proc_groups));
766 
767  __kmp_xproc = 0;
768 
769  for (i = 0; i < __kmp_num_proc_groups; i++) {
770  DWORD size = __kmp_GetActiveProcessorCount(i);
771  __kmp_xproc += size;
772  KA_TRACE(10, ("__kmp_runtime_initialize: proc group %d size = %d\n",
773  i, size));
774  }
775  } else {
776  KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups"
777  " detected\n",
778  __kmp_num_proc_groups));
779  }
780  }
781  }
782  if (__kmp_num_proc_groups <= 1) {
783  GetSystemInfo(&info);
784  __kmp_xproc = info.dwNumberOfProcessors;
785  }
786 #else
787  GetSystemInfo(&info);
788  __kmp_xproc = info.dwNumberOfProcessors;
789 #endif /* KMP_GROUP_AFFINITY */
790 
791  // If the OS said there were 0 procs, take a guess and use a value of 2.
792  // This is done for Linux* OS, also. Do we need error / warning?
793  if (__kmp_xproc <= 0) {
794  __kmp_xproc = 2;
795  }
796 
797  KA_TRACE(5,
798  ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc));
799 
800  __kmp_str_buf_free(&path);
801 
802 #if USE_ITT_BUILD
803  __kmp_itt_initialize();
804 #endif /* USE_ITT_BUILD */
805 
806  __kmp_init_runtime = TRUE;
807 } // __kmp_runtime_initialize
808 
809 void __kmp_runtime_destroy(void) {
810  if (!__kmp_init_runtime) {
811  return;
812  }
813 
814 #if USE_ITT_BUILD
815  __kmp_itt_destroy();
816 #endif /* USE_ITT_BUILD */
817 
818  /* we can't DeleteCriticalsection( & __kmp_win32_section ); */
819  /* due to the KX_TRACE() commands */
820  KA_TRACE(40, ("__kmp_runtime_destroy\n"));
821 
822  if (__kmp_gtid_threadprivate_key) {
823  TlsFree(__kmp_gtid_threadprivate_key);
824  __kmp_gtid_threadprivate_key = 0;
825  }
826 
827  __kmp_affinity_uninitialize();
828  DeleteCriticalSection(&__kmp_win32_section);
829 
830  ntdll = NULL;
831  NtQuerySystemInformation = NULL;
832 
833 #if KMP_ARCH_X86_64
834  kernel32 = NULL;
835  __kmp_GetActiveProcessorCount = NULL;
836  __kmp_GetActiveProcessorGroupCount = NULL;
837  __kmp_GetThreadGroupAffinity = NULL;
838  __kmp_SetThreadGroupAffinity = NULL;
839 #endif // KMP_ARCH_X86_64
840 
841  __kmp_init_runtime = FALSE;
842 }
843 
844 void __kmp_terminate_thread(int gtid) {
845  kmp_info_t *th = __kmp_threads[gtid];
846 
847  if (!th)
848  return;
849 
850  KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
851 
852  if (TerminateThread(th->th.th_info.ds.ds_thread, (DWORD)-1) == FALSE) {
853  /* It's OK, the thread may have exited already */
854  }
855  __kmp_free_handle(th->th.th_info.ds.ds_thread);
856 }
857 
858 void __kmp_clear_system_time(void) {
859  BOOL status;
860  LARGE_INTEGER time;
861  status = QueryPerformanceCounter(&time);
862  __kmp_win32_time = (kmp_int64)time.QuadPart;
863 }
864 
865 void __kmp_initialize_system_tick(void) {
866  {
867  BOOL status;
868  LARGE_INTEGER freq;
869 
870  status = QueryPerformanceFrequency(&freq);
871  if (!status) {
872  DWORD error = GetLastError();
873  __kmp_fatal(KMP_MSG(FunctionError, "QueryPerformanceFrequency()"),
874  KMP_ERR(error), __kmp_msg_null);
875 
876  } else {
877  __kmp_win32_tick = ((double)1.0) / (double)freq.QuadPart;
878  }
879  }
880 }
881 
882 /* Calculate the elapsed wall clock time for the user */
883 
884 void __kmp_elapsed(double *t) {
885  BOOL status;
886  LARGE_INTEGER now;
887  status = QueryPerformanceCounter(&now);
888  *t = ((double)now.QuadPart) * __kmp_win32_tick;
889 }
890 
891 /* Calculate the elapsed wall clock tick for the user */
892 
893 void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; }
894 
895 void __kmp_read_system_time(double *delta) {
896  if (delta != NULL) {
897  BOOL status;
898  LARGE_INTEGER now;
899 
900  status = QueryPerformanceCounter(&now);
901 
902  *delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) *
903  __kmp_win32_tick;
904  }
905 }
906 
907 /* Return the current time stamp in nsec */
908 kmp_uint64 __kmp_now_nsec() {
909  LARGE_INTEGER now;
910  QueryPerformanceCounter(&now);
911  return 1e9 * __kmp_win32_tick * now.QuadPart;
912 }
913 
914 extern "C"
915 void *__stdcall __kmp_launch_worker(void *arg) {
916  volatile void *stack_data;
917  void *exit_val;
918  void *padding = 0;
919  kmp_info_t *this_thr = (kmp_info_t *)arg;
920  int gtid;
921 
922  gtid = this_thr->th.th_info.ds.ds_gtid;
923  __kmp_gtid_set_specific(gtid);
924 #ifdef KMP_TDATA_GTID
925 #error "This define causes problems with LoadLibrary() + declspec(thread) " \
926  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
927  "reference: http://support.microsoft.com/kb/118816"
928 //__kmp_gtid = gtid;
929 #endif
930 
931 #if USE_ITT_BUILD
932  __kmp_itt_thread_name(gtid);
933 #endif /* USE_ITT_BUILD */
934 
935  __kmp_affinity_set_init_mask(gtid, FALSE);
936 
937 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
938  // Set FP control regs to be a copy of the parallel initialization thread's.
939  __kmp_clear_x87_fpu_status_word();
940  __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
941  __kmp_load_mxcsr(&__kmp_init_mxcsr);
942 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
943 
944  if (__kmp_stkoffset > 0 && gtid > 0) {
945  padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
946  }
947 
948  KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive);
949  this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
950  TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE);
951 
952  if (TCR_4(__kmp_gtid_mode) <
953  2) { // check stack only if it is used to get gtid
954  TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data);
955  KMP_ASSERT(this_thr->th.th_info.ds.ds_stackgrow == FALSE);
956  __kmp_check_stack_overlap(this_thr);
957  }
958  KMP_MB();
959  exit_val = __kmp_launch_thread(this_thr);
960  KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive);
961  TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE);
962  KMP_MB();
963  return exit_val;
964 }
965 
966 #if KMP_USE_MONITOR
967 /* The monitor thread controls all of the threads in the complex */
968 
969 void *__stdcall __kmp_launch_monitor(void *arg) {
970  DWORD wait_status;
971  kmp_thread_t monitor;
972  int status;
973  int interval;
974  kmp_info_t *this_thr = (kmp_info_t *)arg;
975 
976  KMP_DEBUG_ASSERT(__kmp_init_monitor);
977  TCW_4(__kmp_init_monitor, 2); // AC: Signal library that monitor has started
978  // TODO: hide "2" in enum (like {true,false,started})
979  this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
980  TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE);
981 
982  KMP_MB(); /* Flush all pending memory write invalidates. */
983  KA_TRACE(10, ("__kmp_launch_monitor: launched\n"));
984 
985  monitor = GetCurrentThread();
986 
987  /* set thread priority */
988  status = SetThreadPriority(monitor, THREAD_PRIORITY_HIGHEST);
989  if (!status) {
990  DWORD error = GetLastError();
991  __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null);
992  }
993 
994  /* register us as monitor */
995  __kmp_gtid_set_specific(KMP_GTID_MONITOR);
996 #ifdef KMP_TDATA_GTID
997 #error "This define causes problems with LoadLibrary() + declspec(thread) " \
998  "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \
999  "reference: http://support.microsoft.com/kb/118816"
1000 //__kmp_gtid = KMP_GTID_MONITOR;
1001 #endif
1002 
1003 #if USE_ITT_BUILD
1004  __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore
1005 // monitor thread.
1006 #endif /* USE_ITT_BUILD */
1007 
1008  KMP_MB(); /* Flush all pending memory write invalidates. */
1009 
1010  interval = (1000 / __kmp_monitor_wakeups); /* in milliseconds */
1011 
1012  while (!TCR_4(__kmp_global.g.g_done)) {
1013  /* This thread monitors the state of the system */
1014 
1015  KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
1016 
1017  wait_status = WaitForSingleObject(__kmp_monitor_ev, interval);
1018 
1019  if (wait_status == WAIT_TIMEOUT) {
1020  TCW_4(__kmp_global.g.g_time.dt.t_value,
1021  TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
1022  }
1023 
1024  KMP_MB(); /* Flush all pending memory write invalidates. */
1025  }
1026 
1027  KA_TRACE(10, ("__kmp_launch_monitor: finished\n"));
1028 
1029  status = SetThreadPriority(monitor, THREAD_PRIORITY_NORMAL);
1030  if (!status) {
1031  DWORD error = GetLastError();
1032  __kmp_fatal(KMP_MSG(CantSetThreadPriority), KMP_ERR(error), __kmp_msg_null);
1033  }
1034 
1035  if (__kmp_global.g.g_abort != 0) {
1036  /* now we need to terminate the worker threads */
1037  /* the value of t_abort is the signal we caught */
1038  int gtid;
1039 
1040  KA_TRACE(10, ("__kmp_launch_monitor: terminate sig=%d\n",
1041  (__kmp_global.g.g_abort)));
1042 
1043  /* terminate the OpenMP worker threads */
1044  /* TODO this is not valid for sibling threads!!
1045  * the uber master might not be 0 anymore.. */
1046  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
1047  __kmp_terminate_thread(gtid);
1048 
1049  __kmp_cleanup();
1050 
1051  Sleep(0);
1052 
1053  KA_TRACE(10,
1054  ("__kmp_launch_monitor: raise sig=%d\n", __kmp_global.g.g_abort));
1055 
1056  if (__kmp_global.g.g_abort > 0) {
1057  raise(__kmp_global.g.g_abort);
1058  }
1059  }
1060 
1061  TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE);
1062 
1063  KMP_MB();
1064  return arg;
1065 }
1066 #endif
1067 
1068 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
1069  kmp_thread_t handle;
1070  DWORD idThread;
1071 
1072  KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
1073 
1074  th->th.th_info.ds.ds_gtid = gtid;
1075 
1076  if (KMP_UBER_GTID(gtid)) {
1077  int stack_data;
1078 
1079  /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for
1080  other threads to use. Is it appropriate to just use GetCurrentThread?
1081  When should we close this handle? When unregistering the root? */
1082  {
1083  BOOL rc;
1084  rc = DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
1085  GetCurrentProcess(), &th->th.th_info.ds.ds_thread, 0,
1086  FALSE, DUPLICATE_SAME_ACCESS);
1087  KMP_ASSERT(rc);
1088  KA_TRACE(10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, "
1089  "handle = %" KMP_UINTPTR_SPEC "\n",
1090  (LPVOID)th, th->th.th_info.ds.ds_thread));
1091  th->th.th_info.ds.ds_thread_id = GetCurrentThreadId();
1092  }
1093  if (TCR_4(__kmp_gtid_mode) < 2) { // check stack only if used to get gtid
1094  /* we will dynamically update the stack range if gtid_mode == 1 */
1095  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
1096  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
1097  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
1098  __kmp_check_stack_overlap(th);
1099  }
1100  } else {
1101  KMP_MB(); /* Flush all pending memory write invalidates. */
1102 
1103  /* Set stack size for this thread now. */
1104  KA_TRACE(10,
1105  ("__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC " bytes\n",
1106  stack_size));
1107 
1108  stack_size += gtid * __kmp_stkoffset;
1109 
1110  TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size);
1111  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
1112 
1113  KA_TRACE(10,
1114  ("__kmp_create_worker: (before) stack_size = %" KMP_SIZE_T_SPEC
1115  " bytes, &__kmp_launch_worker = %p, th = %p, &idThread = %p\n",
1116  (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker,
1117  (LPVOID)th, &idThread));
1118 
1119  handle = CreateThread(
1120  NULL, (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)__kmp_launch_worker,
1121  (LPVOID)th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread);
1122 
1123  KA_TRACE(10,
1124  ("__kmp_create_worker: (after) stack_size = %" KMP_SIZE_T_SPEC
1125  " bytes, &__kmp_launch_worker = %p, th = %p, "
1126  "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n",
1127  (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker,
1128  (LPVOID)th, idThread, handle));
1129 
1130  if (handle == 0) {
1131  DWORD error = GetLastError();
1132  __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null);
1133  } else {
1134  th->th.th_info.ds.ds_thread = handle;
1135  }
1136 
1137  KMP_MB(); /* Flush all pending memory write invalidates. */
1138  }
1139 
1140  KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
1141 }
1142 
1143 int __kmp_still_running(kmp_info_t *th) {
1144  return (WAIT_TIMEOUT == WaitForSingleObject(th->th.th_info.ds.ds_thread, 0));
1145 }
1146 
1147 #if KMP_USE_MONITOR
1148 void __kmp_create_monitor(kmp_info_t *th) {
1149  kmp_thread_t handle;
1150  DWORD idThread;
1151  int ideal, new_ideal;
1152 
1153  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
1154  // We don't need monitor thread in case of MAX_BLOCKTIME
1155  KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
1156  "MAX blocktime\n"));
1157  th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
1158  th->th.th_info.ds.ds_gtid = 0;
1159  TCW_4(__kmp_init_monitor, 2); // Signal to stop waiting for monitor creation
1160  return;
1161  }
1162  KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
1163 
1164  KMP_MB(); /* Flush all pending memory write invalidates. */
1165 
1166  __kmp_monitor_ev = CreateEvent(NULL, TRUE, FALSE, NULL);
1167  if (__kmp_monitor_ev == NULL) {
1168  DWORD error = GetLastError();
1169  __kmp_fatal(KMP_MSG(CantCreateEvent), KMP_ERR(error), __kmp_msg_null);
1170  }
1171 #if USE_ITT_BUILD
1172  __kmp_itt_system_object_created(__kmp_monitor_ev, "Event");
1173 #endif /* USE_ITT_BUILD */
1174 
1175  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
1176  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
1177 
1178  // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how
1179  // to automatically expand stacksize based on CreateThread error code.
1180  if (__kmp_monitor_stksize == 0) {
1181  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
1182  }
1183  if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
1184  __kmp_monitor_stksize = __kmp_sys_min_stksize;
1185  }
1186 
1187  KA_TRACE(10, ("__kmp_create_monitor: requested stacksize = %d bytes\n",
1188  (int)__kmp_monitor_stksize));
1189 
1190  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
1191 
1192  handle =
1193  CreateThread(NULL, (SIZE_T)__kmp_monitor_stksize,
1194  (LPTHREAD_START_ROUTINE)__kmp_launch_monitor, (LPVOID)th,
1195  STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread);
1196  if (handle == 0) {
1197  DWORD error = GetLastError();
1198  __kmp_fatal(KMP_MSG(CantCreateThread), KMP_ERR(error), __kmp_msg_null);
1199  } else
1200  th->th.th_info.ds.ds_thread = handle;
1201 
1202  KMP_MB(); /* Flush all pending memory write invalidates. */
1203 
1204  KA_TRACE(10, ("__kmp_create_monitor: monitor created %p\n",
1205  (void *)th->th.th_info.ds.ds_thread));
1206 }
1207 #endif
1208 
1209 /* Check to see if thread is still alive.
1210  NOTE: The ExitProcess(code) system call causes all threads to Terminate
1211  with a exit_val = code. Because of this we can not rely on exit_val having
1212  any particular value. So this routine may return STILL_ALIVE in exit_val
1213  even after the thread is dead. */
1214 
1215 int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val) {
1216  DWORD rc;
1217  rc = GetExitCodeThread(th->th.th_info.ds.ds_thread, exit_val);
1218  if (rc == 0) {
1219  DWORD error = GetLastError();
1220  __kmp_fatal(KMP_MSG(FunctionError, "GetExitCodeThread()"), KMP_ERR(error),
1221  __kmp_msg_null);
1222  }
1223  return (*exit_val == STILL_ACTIVE);
1224 }
1225 
1226 void __kmp_exit_thread(int exit_status) {
1227  ExitThread(exit_status);
1228 } // __kmp_exit_thread
1229 
1230 // This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor().
1231 static void __kmp_reap_common(kmp_info_t *th) {
1232  DWORD exit_val;
1233 
1234  KMP_MB(); /* Flush all pending memory write invalidates. */
1235 
1236  KA_TRACE(
1237  10, ("__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid));
1238 
1239  /* 2006-10-19:
1240  There are two opposite situations:
1241  1. Windows* OS keep thread alive after it resets ds_alive flag and
1242  exits from thread function. (For example, see C70770/Q394281 "unloading of
1243  dll based on OMP is very slow".)
1244  2. Windows* OS may kill thread before it resets ds_alive flag.
1245 
1246  Right solution seems to be waiting for *either* thread termination *or*
1247  ds_alive resetting. */
1248  {
1249  // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize
1250  // KMP_WAIT_YIELD to cover this usage also.
1251  void *obj = NULL;
1252  kmp_uint32 spins;
1253 #if USE_ITT_BUILD
1254  KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive);
1255 #endif /* USE_ITT_BUILD */
1256  KMP_INIT_YIELD(spins);
1257  do {
1258 #if USE_ITT_BUILD
1259  KMP_FSYNC_SPIN_PREPARE(obj);
1260 #endif /* USE_ITT_BUILD */
1261  __kmp_is_thread_alive(th, &exit_val);
1262  KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
1263  KMP_YIELD_SPIN(spins);
1264  } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive));
1265 #if USE_ITT_BUILD
1266  if (exit_val == STILL_ACTIVE) {
1267  KMP_FSYNC_CANCEL(obj);
1268  } else {
1269  KMP_FSYNC_SPIN_ACQUIRED(obj);
1270  }
1271 #endif /* USE_ITT_BUILD */
1272  }
1273 
1274  __kmp_free_handle(th->th.th_info.ds.ds_thread);
1275 
1276  /* NOTE: The ExitProcess(code) system call causes all threads to Terminate
1277  with a exit_val = code. Because of this we can not rely on exit_val having
1278  any particular value. */
1279  if (exit_val == STILL_ACTIVE) {
1280  KA_TRACE(1, ("__kmp_reap_common: thread still active.\n"));
1281  } else if ((void *)exit_val != (void *)th) {
1282  KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n"));
1283  }
1284 
1285  KA_TRACE(10,
1286  ("__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC
1287  "\n",
1288  th->th.th_info.ds.ds_gtid, th->th.th_info.ds.ds_thread));
1289 
1290  th->th.th_info.ds.ds_thread = 0;
1291  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1292  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1293  th->th.th_info.ds.ds_thread_id = 0;
1294 
1295  KMP_MB(); /* Flush all pending memory write invalidates. */
1296 }
1297 
1298 #if KMP_USE_MONITOR
1299 void __kmp_reap_monitor(kmp_info_t *th) {
1300  int status;
1301 
1302  KA_TRACE(10, ("__kmp_reap_monitor: try to reap %p\n",
1303  (void *)th->th.th_info.ds.ds_thread));
1304 
1305  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1306  // If both tid and gtid are 0, it means the monitor did not ever start.
1307  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1308  KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1309  if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1310  KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1311  return;
1312  }
1313 
1314  KMP_MB(); /* Flush all pending memory write invalidates. */
1315 
1316  status = SetEvent(__kmp_monitor_ev);
1317  if (status == FALSE) {
1318  DWORD error = GetLastError();
1319  __kmp_fatal(KMP_MSG(CantSetEvent), KMP_ERR(error), __kmp_msg_null);
1320  }
1321  KA_TRACE(10, ("__kmp_reap_monitor: reaping thread (%d)\n",
1322  th->th.th_info.ds.ds_gtid));
1323  __kmp_reap_common(th);
1324 
1325  __kmp_free_handle(__kmp_monitor_ev);
1326 
1327  KMP_MB(); /* Flush all pending memory write invalidates. */
1328 }
1329 #endif
1330 
1331 void __kmp_reap_worker(kmp_info_t *th) {
1332  KA_TRACE(10, ("__kmp_reap_worker: reaping thread (%d)\n",
1333  th->th.th_info.ds.ds_gtid));
1334  __kmp_reap_common(th);
1335 }
1336 
1337 #if KMP_HANDLE_SIGNALS
1338 
1339 static void __kmp_team_handler(int signo) {
1340  if (__kmp_global.g.g_abort == 0) {
1341  // Stage 1 signal handler, let's shut down all of the threads.
1342  if (__kmp_debug_buf) {
1343  __kmp_dump_debug_buffer();
1344  }
1345  KMP_MB(); // Flush all pending memory write invalidates.
1346  TCW_4(__kmp_global.g.g_abort, signo);
1347  KMP_MB(); // Flush all pending memory write invalidates.
1348  TCW_4(__kmp_global.g.g_done, TRUE);
1349  KMP_MB(); // Flush all pending memory write invalidates.
1350  }
1351 } // __kmp_team_handler
1352 
1353 static sig_func_t __kmp_signal(int signum, sig_func_t handler) {
1354  sig_func_t old = signal(signum, handler);
1355  if (old == SIG_ERR) {
1356  int error = errno;
1357  __kmp_fatal(KMP_MSG(FunctionError, "signal"), KMP_ERR(error),
1358  __kmp_msg_null);
1359  }
1360  return old;
1361 }
1362 
1363 static void __kmp_install_one_handler(int sig, sig_func_t handler,
1364  int parallel_init) {
1365  sig_func_t old;
1366  KMP_MB(); /* Flush all pending memory write invalidates. */
1367  KB_TRACE(60, ("__kmp_install_one_handler: called: sig=%d\n", sig));
1368  if (parallel_init) {
1369  old = __kmp_signal(sig, handler);
1370  // SIG_DFL on Windows* OS in NULL or 0.
1371  if (old == __kmp_sighldrs[sig]) {
1372  __kmp_siginstalled[sig] = 1;
1373  } else { // Restore/keep user's handler if one previously installed.
1374  old = __kmp_signal(sig, old);
1375  }
1376  } else {
1377  // Save initial/system signal handlers to see if user handlers installed.
1378  // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals
1379  // called once with parallel_init == TRUE.
1380  old = __kmp_signal(sig, SIG_DFL);
1381  __kmp_sighldrs[sig] = old;
1382  __kmp_signal(sig, old);
1383  }
1384  KMP_MB(); /* Flush all pending memory write invalidates. */
1385 } // __kmp_install_one_handler
1386 
1387 static void __kmp_remove_one_handler(int sig) {
1388  if (__kmp_siginstalled[sig]) {
1389  sig_func_t old;
1390  KMP_MB(); // Flush all pending memory write invalidates.
1391  KB_TRACE(60, ("__kmp_remove_one_handler: called: sig=%d\n", sig));
1392  old = __kmp_signal(sig, __kmp_sighldrs[sig]);
1393  if (old != __kmp_team_handler) {
1394  KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1395  "restoring: sig=%d\n",
1396  sig));
1397  old = __kmp_signal(sig, old);
1398  }
1399  __kmp_sighldrs[sig] = NULL;
1400  __kmp_siginstalled[sig] = 0;
1401  KMP_MB(); // Flush all pending memory write invalidates.
1402  }
1403 } // __kmp_remove_one_handler
1404 
1405 void __kmp_install_signals(int parallel_init) {
1406  KB_TRACE(10, ("__kmp_install_signals: called\n"));
1407  if (!__kmp_handle_signals) {
1408  KB_TRACE(10, ("__kmp_install_signals: KMP_HANDLE_SIGNALS is false - "
1409  "handlers not installed\n"));
1410  return;
1411  }
1412  __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1413  __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1414  __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1415  __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1416  __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1417  __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1418 } // __kmp_install_signals
1419 
1420 void __kmp_remove_signals(void) {
1421  int sig;
1422  KB_TRACE(10, ("__kmp_remove_signals: called\n"));
1423  for (sig = 1; sig < NSIG; ++sig) {
1424  __kmp_remove_one_handler(sig);
1425  }
1426 } // __kmp_remove_signals
1427 
1428 #endif // KMP_HANDLE_SIGNALS
1429 
1430 /* Put the thread to sleep for a time period */
1431 void __kmp_thread_sleep(int millis) {
1432  DWORD status;
1433 
1434  status = SleepEx((DWORD)millis, FALSE);
1435  if (status) {
1436  DWORD error = GetLastError();
1437  __kmp_fatal(KMP_MSG(FunctionError, "SleepEx()"), KMP_ERR(error),
1438  __kmp_msg_null);
1439  }
1440 }
1441 
1442 // Determine whether the given address is mapped into the current address space.
1443 int __kmp_is_address_mapped(void *addr) {
1444  DWORD status;
1445  MEMORY_BASIC_INFORMATION lpBuffer;
1446  SIZE_T dwLength;
1447 
1448  dwLength = sizeof(MEMORY_BASIC_INFORMATION);
1449 
1450  status = VirtualQuery(addr, &lpBuffer, dwLength);
1451 
1452  return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) ||
1453  ((lpBuffer.Protect == PAGE_NOACCESS) ||
1454  (lpBuffer.Protect == PAGE_EXECUTE)));
1455 }
1456 
1457 kmp_uint64 __kmp_hardware_timestamp(void) {
1458  kmp_uint64 r = 0;
1459 
1460  QueryPerformanceCounter((LARGE_INTEGER *)&r);
1461  return r;
1462 }
1463 
1464 /* Free handle and check the error code */
1465 void __kmp_free_handle(kmp_thread_t tHandle) {
1466  /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined
1467  * as HANDLE */
1468  BOOL rc;
1469  rc = CloseHandle(tHandle);
1470  if (!rc) {
1471  DWORD error = GetLastError();
1472  __kmp_fatal(KMP_MSG(CantCloseHandle), KMP_ERR(error), __kmp_msg_null);
1473  }
1474 }
1475 
1476 int __kmp_get_load_balance(int max) {
1477  static ULONG glb_buff_size = 100 * 1024;
1478 
1479  // Saved count of the running threads for the thread balance algortihm
1480  static int glb_running_threads = 0;
1481  static double glb_call_time = 0; /* Thread balance algorithm call time */
1482 
1483  int running_threads = 0; // Number of running threads in the system.
1484  NTSTATUS status = 0;
1485  ULONG buff_size = 0;
1486  ULONG info_size = 0;
1487  void *buffer = NULL;
1488  PSYSTEM_PROCESS_INFORMATION spi = NULL;
1489  int first_time = 1;
1490 
1491  double call_time = 0.0; // start, finish;
1492 
1493  __kmp_elapsed(&call_time);
1494 
1495  if (glb_call_time &&
1496  (call_time - glb_call_time < __kmp_load_balance_interval)) {
1497  running_threads = glb_running_threads;
1498  goto finish;
1499  }
1500  glb_call_time = call_time;
1501 
1502  // Do not spend time on running algorithm if we have a permanent error.
1503  if (NtQuerySystemInformation == NULL) {
1504  running_threads = -1;
1505  goto finish;
1506  }
1507 
1508  if (max <= 0) {
1509  max = INT_MAX;
1510  }
1511 
1512  do {
1513 
1514  if (first_time) {
1515  buff_size = glb_buff_size;
1516  } else {
1517  buff_size = 2 * buff_size;
1518  }
1519 
1520  buffer = KMP_INTERNAL_REALLOC(buffer, buff_size);
1521  if (buffer == NULL) {
1522  running_threads = -1;
1523  goto finish;
1524  }
1525  status = NtQuerySystemInformation(SystemProcessInformation, buffer,
1526  buff_size, &info_size);
1527  first_time = 0;
1528 
1529  } while (status == STATUS_INFO_LENGTH_MISMATCH);
1530  glb_buff_size = buff_size;
1531 
1532 #define CHECK(cond) \
1533  { \
1534  KMP_DEBUG_ASSERT(cond); \
1535  if (!(cond)) { \
1536  running_threads = -1; \
1537  goto finish; \
1538  } \
1539  }
1540 
1541  CHECK(buff_size >= info_size);
1542  spi = PSYSTEM_PROCESS_INFORMATION(buffer);
1543  for (;;) {
1544  ptrdiff_t offset = uintptr_t(spi) - uintptr_t(buffer);
1545  CHECK(0 <= offset &&
1546  offset + sizeof(SYSTEM_PROCESS_INFORMATION) < info_size);
1547  HANDLE pid = spi->ProcessId;
1548  ULONG num = spi->NumberOfThreads;
1549  CHECK(num >= 1);
1550  size_t spi_size =
1551  sizeof(SYSTEM_PROCESS_INFORMATION) + sizeof(SYSTEM_THREAD) * (num - 1);
1552  CHECK(offset + spi_size <
1553  info_size); // Make sure process info record fits the buffer.
1554  if (spi->NextEntryOffset != 0) {
1555  CHECK(spi_size <=
1556  spi->NextEntryOffset); // And do not overlap with the next record.
1557  }
1558  // pid == 0 corresponds to the System Idle Process. It always has running
1559  // threads on all cores. So, we don't consider the running threads of this
1560  // process.
1561  if (pid != 0) {
1562  for (int i = 0; i < num; ++i) {
1563  THREAD_STATE state = spi->Threads[i].State;
1564  // Count threads that have Ready or Running state.
1565  // !!! TODO: Why comment does not match the code???
1566  if (state == StateRunning) {
1567  ++running_threads;
1568  // Stop counting running threads if the number is already greater than
1569  // the number of available cores
1570  if (running_threads >= max) {
1571  goto finish;
1572  }
1573  }
1574  }
1575  }
1576  if (spi->NextEntryOffset == 0) {
1577  break;
1578  }
1579  spi = PSYSTEM_PROCESS_INFORMATION(uintptr_t(spi) + spi->NextEntryOffset);
1580  }
1581 
1582 #undef CHECK
1583 
1584 finish: // Clean up and exit.
1585 
1586  if (buffer != NULL) {
1587  KMP_INTERNAL_FREE(buffer);
1588  }
1589 
1590  glb_running_threads = running_threads;
1591 
1592  return running_threads;
1593 } //__kmp_get_load_balance()