14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495#if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527#if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623#ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645#ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743#ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935#if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006#if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1030#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK;
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1067inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1071 kmp_int16 x87_fpu_control_word;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK;
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1088#define propagateFPControl(x) ((void)0)
1089#define updateHWFPControl(x) ((void)0)
1092static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1097void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1101 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1108 if (!TCR_4(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1116 KMP_DEBUG_ASSERT(serial_team);
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1121 this_thr->th.th_task_team ==
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1125 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1126 "team %p, new task_team = NULL\n",
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1128 this_thr->th.th_task_team = NULL;
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 }
else if (proc_bind == proc_bind_default) {
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1143 ompt_data_t ompt_parallel_data = ompt_data_none;
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1163 if (this_thr->th.th_team != serial_team) {
1165 int level = this_thr->th.th_team->t.t_level;
1167 if (serial_team->t.t_serialized) {
1170 kmp_team_t *new_team;
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL));
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team);
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1192 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1193 global_tid, serial_team));
1201 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1202 global_tid, serial_team));
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1217 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1218 this_thr->th.th_current_task));
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1244 serial_team->t.t_pkfn = (microtask_t)(~0);
1246 this_thr->th.th_info.ds.ds_tid = 0;
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1257 propagateFPControl(serial_team);
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(
1264 sizeof(dispatch_private_info_t));
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1280 int level = this_thr->th.th_team->t.t_level;
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1289 "of serial team %p to %d\n",
1290 global_tid, serial_team, serial_team->t.t_level));
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(
1297 sizeof(dispatch_private_info_t));
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1313 __kmp_aux_display_affinity(global_tid, NULL);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL);
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0);
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1339 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit);
1341 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0);
1355int __kmp_fork_call(
ident_t *loc,
int gtid,
1356 enum fork_context_e call_context,
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1362 int master_this_cons;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1369 int master_set_numthreads;
1373#if KMP_NESTED_HOT_TEAMS
1374 kmp_hot_team_ptr_t **p_hot_teams;
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1380 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1393 if (!TCR_4(__kmp_init_parallel))
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_data_t ompt_parallel_data = ompt_data_none;
1409 ompt_data_t *parent_task_data;
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL;
1414 if (ompt_enabled.enabled) {
1415 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1422 __kmp_assign_root_init_mask();
1425 level = parent_team->t.t_level;
1427 active_level = parent_team->t.t_active_level;
1429 teams_level = master_th->th.th_teams_level;
1430#if KMP_NESTED_HOT_TEAMS
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1442 if (ompt_enabled.enabled) {
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
1444 int team_size = master_set_numthreads
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid);
1447 int flags = OMPT_INVOKER(call_context) |
1448 ((microtask == (microtask_t)__kmp_teams_master)
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1459 master_th->th.th_ident = loc;
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1468 parent_team->t.t_argc = argc;
1469 argv = (
void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1473 if (parent_team == master_th->th.th_serial_team) {
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1478 if (call_context == fork_context_gnu) {
1481 parent_team->t.t_serialized--;
1486 parent_team->t.t_pkfn = microtask;
1491 void **exit_frame_p;
1493 ompt_lw_taskteam_t lw_taskteam;
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1517 exit_frame_p = &dummy;
1522 parent_team->t.t_serialized--;
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL;
1538 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1541 ompt_scope_end, NULL, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1549 OMPT_INVOKER(call_context) | ompt_parallel_team,
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1575 if (master_set_numthreads) {
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1594 master_th->th.th_set_nproc = 0;
1598 if (__kmp_debugging) {
1599 int nth = __kmp_omp_num_threads(loc);
1601 master_set_numthreads = nth;
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1641#if USE_ITT_BUILD && USE_ITT_NOTIFY
1642 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1651 if (__itt_stack_caller_create_ptr) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1657#if KMP_AFFINITY_SUPPORTED
1658 __kmp_partition_places(parent_team);
1661 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1662 "master_th=%p, gtid=%d\n",
1663 root, parent_team, master_th, gtid));
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1666 "master_th=%p, gtid=%d\n",
1667 root, parent_team, master_th, gtid));
1669 if (call_context == fork_context_gnu)
1673 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1674 parent_team->t.t_id, parent_team->t.t_pkfn));
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1679 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1680 parent_team->t.t_id, parent_team->t.t_pkfn));
1683 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1691 parent_team->t.t_task_team[master_th->th.th_task_state]);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1703 enter_teams = ((ap == NULL && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1708 : get__nproc_2(parent_team, master_tid);
1713 if ((get__max_active_levels(master_th) == 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1739 KMP_DEBUG_ASSERT(nthreads > 0);
1742 master_th->th.th_set_nproc = 0;
1745 if (nthreads == 1) {
1747#if KMP_OS_LINUX && \
1748 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1751 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1756 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1764 if (call_context == fork_context_intel) {
1766 master_th->th.th_serial_team->t.t_ident = loc;
1769 master_th->th.th_serial_team->t.t_level--;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1777 ompt_lw_taskteam_t lw_taskteam;
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1786 task_info = OMPT_CUR_TASK_INFO(master_th);
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1795 ompt_task_implicit);
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1801 exit_frame_p = &dummy;
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1824 ompt_task_implicit);
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_team,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1837 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1839 master_th->th.th_serial_team);
1840 team = master_th->th.th_team;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE);
1844 team->t.t_argc = argc;
1845 argv = (
void **)team->t.t_argv;
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1850 for (i = 0; i < argc; ++i)
1852 argv[i] = parent_team->t.t_argv[i];
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_end, NULL, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context) | ompt_parallel_league,
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1887 ompt_lw_taskteam_t lw_taskteam;
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1894 task_info = OMPT_CUR_TASK_INFO(master_th);
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_frame_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1933 ompt_task_implicit);
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context) | ompt_parallel_team,
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 ompt_lw_taskteam_t lwt;
1951 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1954 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1955 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1960 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1963 KMP_ASSERT2(call_context < fork_context_last,
1964 "__kmp_fork_call: unknown fork_context parameter");
1967 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1974 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1975 "curtask=%p, curtask_max_aclevel=%d\n",
1976 parent_team->t.t_active_level, master_th,
1977 master_th->th.th_current_task,
1978 master_th->th.th_current_task->td_icvs.max_active_levels));
1982 master_th->th.th_current_task->td_flags.executing = 0;
1984 if (!master_th->th.th_teams_microtask || level > teams_level) {
1986 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1990 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1991 if ((level + 1 < __kmp_nested_nth.used) &&
1992 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1993 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1999 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2001 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2002 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2003 proc_bind = proc_bind_false;
2007 if (proc_bind == proc_bind_default) {
2008 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2011 if (master_th->th.th_teams_microtask &&
2012 microtask == (microtask_t)__kmp_teams_master) {
2013 proc_bind = __kmp_teams_proc_bind;
2019 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2020 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2021 master_th->th.th_current_task->td_icvs.proc_bind)) {
2024 if (!master_th->th.th_teams_microtask ||
2025 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2026 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2031 master_th->th.th_set_proc_bind = proc_bind_default;
2033 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2034 kmp_internal_control_t new_icvs;
2035 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2036 new_icvs.next = NULL;
2037 if (nthreads_icv > 0) {
2038 new_icvs.nproc = nthreads_icv;
2040 if (proc_bind_icv != proc_bind_default) {
2041 new_icvs.proc_bind = proc_bind_icv;
2045 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2046 team = __kmp_allocate_team(root, nthreads, nthreads,
2050 proc_bind, &new_icvs,
2051 argc USE_NESTED_HOT_ARG(master_th));
2052 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2053 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2056 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2057 team = __kmp_allocate_team(root, nthreads, nthreads,
2062 &master_th->th.th_current_task->td_icvs,
2063 argc USE_NESTED_HOT_ARG(master_th));
2064 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2065 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2066 &master_th->th.th_current_task->td_icvs);
2069 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2072 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2073 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2074 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2075 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2076 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2078 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2081 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2083 if (!master_th->th.th_teams_microtask || level > teams_level) {
2084 int new_level = parent_team->t.t_level + 1;
2085 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2086 new_level = parent_team->t.t_active_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2090 int new_level = parent_team->t.t_level;
2091 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2092 new_level = parent_team->t.t_active_level;
2093 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2095 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2097 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2099 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2100 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2103 propagateFPControl(team);
2105 if (ompd_state & OMPD_ENABLE_BP)
2106 ompd_bp_parallel_begin();
2109 if (__kmp_tasking_mode != tskm_immediate_exec) {
2112 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2113 parent_team->t.t_task_team[master_th->th.th_task_state]);
2114 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2115 "%p, new task_team %p / team %p\n",
2116 __kmp_gtid_from_thread(master_th),
2117 master_th->th.th_task_team, parent_team,
2118 team->t.t_task_team[master_th->th.th_task_state], team));
2120 if (active_level || master_th->th.th_task_team) {
2122 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2123 if (master_th->th.th_task_state_top >=
2124 master_th->th.th_task_state_stack_sz) {
2125 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2126 kmp_uint8 *old_stack, *new_stack;
2128 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2129 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2130 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2132 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2136 old_stack = master_th->th.th_task_state_memo_stack;
2137 master_th->th.th_task_state_memo_stack = new_stack;
2138 master_th->th.th_task_state_stack_sz = new_size;
2139 __kmp_free(old_stack);
2143 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2144 master_th->th.th_task_state;
2145 master_th->th.th_task_state_top++;
2146#if KMP_NESTED_HOT_TEAMS
2147 if (master_th->th.th_hot_teams &&
2148 active_level < __kmp_hot_teams_max_level &&
2149 team == master_th->th.th_hot_teams[active_level].hot_team) {
2151 master_th->th.th_task_state =
2153 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2156 master_th->th.th_task_state = 0;
2157#if KMP_NESTED_HOT_TEAMS
2161#if !KMP_NESTED_HOT_TEAMS
2162 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2163 (team == root->r.r_hot_team));
2169 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2170 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2172 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2173 (team->t.t_master_tid == 0 &&
2174 (team->t.t_parent == root->r.r_root_team ||
2175 team->t.t_parent->t.t_serialized)));
2179 argv = (
void **)team->t.t_argv;
2181 for (i = argc - 1; i >= 0; --i) {
2182 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2183 KMP_CHECK_UPDATE(*argv, new_argv);
2187 for (i = 0; i < argc; ++i) {
2189 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2194 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2195 if (!root->r.r_active)
2196 root->r.r_active = TRUE;
2198 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2199 __kmp_setup_icv_copy(team, nthreads,
2200 &master_th->th.th_current_task->td_icvs, loc);
2203 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2206 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2209 if (team->t.t_active_level == 1
2210 && !master_th->th.th_teams_microtask) {
2212 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2213 (__kmp_forkjoin_frames_mode == 3 ||
2214 __kmp_forkjoin_frames_mode == 1)) {
2215 kmp_uint64 tmp_time = 0;
2216 if (__itt_get_timestamp_ptr)
2217 tmp_time = __itt_get_timestamp();
2219 master_th->th.th_frame_time = tmp_time;
2220 if (__kmp_forkjoin_frames_mode == 3)
2221 team->t.t_region_time = tmp_time;
2225 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2226 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2228 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2234 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2237 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
2241 if (__itt_stack_caller_create_ptr) {
2244 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2245 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2246 }
else if (parent_team->t.t_serialized) {
2251 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2252 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2260 __kmp_internal_fork(loc, gtid, team);
2261 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2262 "master_th=%p, gtid=%d\n",
2263 root, team, master_th, gtid));
2266 if (call_context == fork_context_gnu) {
2267 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2272 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2273 team->t.t_id, team->t.t_pkfn));
2276#if KMP_STATS_ENABLED
2280 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2284 if (!team->t.t_invoke(gtid)) {
2285 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2288#if KMP_STATS_ENABLED
2291 KMP_SET_THREAD_STATE(previous_state);
2295 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2296 team->t.t_id, team->t.t_pkfn));
2299 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2301 if (ompt_enabled.enabled) {
2302 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2310static inline void __kmp_join_restore_state(kmp_info_t *thread,
2313 thread->th.ompt_thread_info.state =
2314 ((team->t.t_serialized) ? ompt_state_work_serial
2315 : ompt_state_work_parallel);
2318static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2319 kmp_team_t *team, ompt_data_t *parallel_data,
2320 int flags,
void *codeptr) {
2321 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2322 if (ompt_enabled.ompt_callback_parallel_end) {
2323 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2324 parallel_data, &(task_info->task_data), flags, codeptr);
2327 task_info->frame.enter_frame = ompt_data_none;
2328 __kmp_join_restore_state(thread, team);
2332void __kmp_join_call(
ident_t *loc,
int gtid
2335 enum fork_context_e fork_context
2339 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2341 kmp_team_t *parent_team;
2342 kmp_info_t *master_th;
2346 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2349 master_th = __kmp_threads[gtid];
2350 root = master_th->th.th_root;
2351 team = master_th->th.th_team;
2352 parent_team = team->t.t_parent;
2354 master_th->th.th_ident = loc;
2357 void *team_microtask = (
void *)team->t.t_pkfn;
2361 if (ompt_enabled.enabled &&
2362 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2363 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2368 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2369 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2370 "th_task_team = %p\n",
2371 __kmp_gtid_from_thread(master_th), team,
2372 team->t.t_task_team[master_th->th.th_task_state],
2373 master_th->th.th_task_team));
2374 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2375 team->t.t_task_team[master_th->th.th_task_state]);
2379 if (team->t.t_serialized) {
2380 if (master_th->th.th_teams_microtask) {
2382 int level = team->t.t_level;
2383 int tlevel = master_th->th.th_teams_level;
2384 if (level == tlevel) {
2388 }
else if (level == tlevel + 1) {
2392 team->t.t_serialized++;
2398 if (ompt_enabled.enabled) {
2399 __kmp_join_restore_state(master_th, parent_team);
2406 master_active = team->t.t_master_active;
2411 __kmp_internal_join(loc, gtid, team);
2413 if (__itt_stack_caller_create_ptr) {
2414 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2416 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2417 team->t.t_stack_id = NULL;
2421 master_th->th.th_task_state =
2424 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2425 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2429 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2430 parent_team->t.t_stack_id = NULL;
2434 if (team->t.t_nproc > 1 &&
2435 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2436 team->t.b->update_num_threads(team->t.t_nproc);
2437 __kmp_add_threads_to_team(team, team->t.t_nproc);
2444 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2445 void *codeptr = team->t.ompt_team_info.master_return_address;
2450 if (team->t.t_active_level == 1 &&
2451 (!master_th->th.th_teams_microtask ||
2452 master_th->th.th_teams_size.nteams == 1)) {
2453 master_th->th.th_ident = loc;
2456 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2457 __kmp_forkjoin_frames_mode == 3)
2458 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2459 master_th->th.th_frame_time, 0, loc,
2460 master_th->th.th_team_nproc, 1);
2461 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2462 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2463 __kmp_itt_region_joined(gtid);
2467#if KMP_AFFINITY_SUPPORTED
2470 master_th->th.th_first_place = team->t.t_first_place;
2471 master_th->th.th_last_place = team->t.t_last_place;
2475 if (master_th->th.th_teams_microtask && !exit_teams &&
2476 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2477 team->t.t_level == master_th->th.th_teams_level + 1) {
2482 ompt_data_t ompt_parallel_data = ompt_data_none;
2483 if (ompt_enabled.enabled) {
2484 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2485 if (ompt_enabled.ompt_callback_implicit_task) {
2486 int ompt_team_size = team->t.t_nproc;
2487 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2488 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2489 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2491 task_info->frame.exit_frame = ompt_data_none;
2492 task_info->task_data = ompt_data_none;
2493 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2494 __ompt_lw_taskteam_unlink(master_th);
2499 team->t.t_active_level--;
2500 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2506 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2507 int old_num = master_th->th.th_team_nproc;
2508 int new_num = master_th->th.th_teams_size.nth;
2509 kmp_info_t **other_threads = team->t.t_threads;
2510 team->t.t_nproc = new_num;
2511 for (
int i = 0; i < old_num; ++i) {
2512 other_threads[i]->th.th_team_nproc = new_num;
2515 for (
int i = old_num; i < new_num; ++i) {
2517 KMP_DEBUG_ASSERT(other_threads[i]);
2518 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2519 for (
int b = 0; b < bs_last_barrier; ++b) {
2520 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2521 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2523 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2526 if (__kmp_tasking_mode != tskm_immediate_exec) {
2528 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2534 if (ompt_enabled.enabled) {
2535 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2536 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2544 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2545 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2547 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2552 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2554 if (!master_th->th.th_teams_microtask ||
2555 team->t.t_level > master_th->th.th_teams_level) {
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2559 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2562 if (ompt_enabled.enabled) {
2563 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2564 if (ompt_enabled.ompt_callback_implicit_task) {
2565 int flags = (team_microtask == (
void *)__kmp_teams_master)
2567 : ompt_task_implicit;
2568 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2569 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2570 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2571 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2573 task_info->frame.exit_frame = ompt_data_none;
2574 task_info->task_data = ompt_data_none;
2578 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2580 __kmp_pop_current_task_from_thread(master_th);
2582 master_th->th.th_def_allocator = team->t.t_def_allocator;
2585 if (ompd_state & OMPD_ENABLE_BP)
2586 ompd_bp_parallel_end();
2588 updateHWFPControl(team);
2590 if (root->r.r_active != master_active)
2591 root->r.r_active = master_active;
2593 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2601 master_th->th.th_team = parent_team;
2602 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2603 master_th->th.th_team_master = parent_team->t.t_threads[0];
2604 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2607 if (parent_team->t.t_serialized &&
2608 parent_team != master_th->th.th_serial_team &&
2609 parent_team != root->r.r_root_team) {
2610 __kmp_free_team(root,
2611 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2612 master_th->th.th_serial_team = parent_team;
2615 if (__kmp_tasking_mode != tskm_immediate_exec) {
2616 if (master_th->th.th_task_state_top >
2618 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2620 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2621 master_th->th.th_task_state;
2622 --master_th->th.th_task_state_top;
2624 master_th->th.th_task_state =
2626 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2629 master_th->th.th_task_team =
2630 parent_team->t.t_task_team[master_th->th.th_task_state];
2632 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2633 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2640 master_th->th.th_current_task->td_flags.executing = 1;
2642 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2644#if KMP_AFFINITY_SUPPORTED
2645 if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2646 __kmp_reset_root_init_mask(gtid);
2651 OMPT_INVOKER(fork_context) |
2652 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2653 : ompt_parallel_team);
2654 if (ompt_enabled.enabled) {
2655 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2661 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2666void __kmp_save_internal_controls(kmp_info_t *thread) {
2668 if (thread->th.th_team != thread->th.th_serial_team) {
2671 if (thread->th.th_team->t.t_serialized > 1) {
2674 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2677 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2678 thread->th.th_team->t.t_serialized) {
2683 kmp_internal_control_t *control =
2684 (kmp_internal_control_t *)__kmp_allocate(
2685 sizeof(kmp_internal_control_t));
2687 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2689 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2691 control->next = thread->th.th_team->t.t_control_stack_top;
2692 thread->th.th_team->t.t_control_stack_top = control;
2698void __kmp_set_num_threads(
int new_nth,
int gtid) {
2702 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2703 KMP_DEBUG_ASSERT(__kmp_init_serial);
2707 else if (new_nth > __kmp_max_nth)
2708 new_nth = __kmp_max_nth;
2711 thread = __kmp_threads[gtid];
2712 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2715 __kmp_save_internal_controls(thread);
2717 set__nproc(thread, new_nth);
2722 root = thread->th.th_root;
2723 if (__kmp_init_parallel && (!root->r.r_active) &&
2724 (root->r.r_hot_team->t.t_nproc > new_nth)
2725#
if KMP_NESTED_HOT_TEAMS
2726 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2729 kmp_team_t *hot_team = root->r.r_hot_team;
2732 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2734 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2735 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2738 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2739 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2740 if (__kmp_tasking_mode != tskm_immediate_exec) {
2743 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2745 __kmp_free_thread(hot_team->t.t_threads[f]);
2746 hot_team->t.t_threads[f] = NULL;
2748 hot_team->t.t_nproc = new_nth;
2749#if KMP_NESTED_HOT_TEAMS
2750 if (thread->th.th_hot_teams) {
2751 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2752 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2756 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2757 hot_team->t.b->update_num_threads(new_nth);
2758 __kmp_add_threads_to_team(hot_team, new_nth);
2761 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2764 for (f = 0; f < new_nth; f++) {
2765 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2766 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2769 hot_team->t.t_size_changed = -1;
2774void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2777 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2779 gtid, max_active_levels));
2780 KMP_DEBUG_ASSERT(__kmp_init_serial);
2783 if (max_active_levels < 0) {
2784 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2789 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2790 "max_active_levels for thread %d = (%d)\n",
2791 gtid, max_active_levels));
2794 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2799 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2800 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2801 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2807 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2808 "max_active_levels for thread %d = (%d)\n",
2809 gtid, max_active_levels));
2811 thread = __kmp_threads[gtid];
2813 __kmp_save_internal_controls(thread);
2815 set__max_active_levels(thread, max_active_levels);
2819int __kmp_get_max_active_levels(
int gtid) {
2822 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2823 KMP_DEBUG_ASSERT(__kmp_init_serial);
2825 thread = __kmp_threads[gtid];
2826 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2827 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2828 "curtask_maxaclevel=%d\n",
2829 gtid, thread->th.th_current_task,
2830 thread->th.th_current_task->td_icvs.max_active_levels));
2831 return thread->th.th_current_task->td_icvs.max_active_levels;
2835void __kmp_set_num_teams(
int num_teams) {
2837 __kmp_nteams = num_teams;
2839int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2841void __kmp_set_teams_thread_limit(
int limit) {
2843 __kmp_teams_thread_limit = limit;
2845int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2847KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2848KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2851void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2853 kmp_sched_t orig_kind;
2856 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2857 gtid, (
int)kind, chunk));
2858 KMP_DEBUG_ASSERT(__kmp_init_serial);
2865 kind = __kmp_sched_without_mods(kind);
2867 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2868 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2870 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2871 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2873 kind = kmp_sched_default;
2877 thread = __kmp_threads[gtid];
2879 __kmp_save_internal_controls(thread);
2881 if (kind < kmp_sched_upper_std) {
2882 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2885 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2887 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2888 __kmp_sch_map[kind - kmp_sched_lower - 1];
2893 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2894 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2895 kmp_sched_lower - 2];
2897 __kmp_sched_apply_mods_intkind(
2898 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2899 if (kind == kmp_sched_auto || chunk < 1) {
2901 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2903 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2908void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2912 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2913 KMP_DEBUG_ASSERT(__kmp_init_serial);
2915 thread = __kmp_threads[gtid];
2917 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2918 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2920 case kmp_sch_static_greedy:
2921 case kmp_sch_static_balanced:
2922 *kind = kmp_sched_static;
2923 __kmp_sched_apply_mods_stdkind(kind, th_type);
2926 case kmp_sch_static_chunked:
2927 *kind = kmp_sched_static;
2929 case kmp_sch_dynamic_chunked:
2930 *kind = kmp_sched_dynamic;
2933 case kmp_sch_guided_iterative_chunked:
2934 case kmp_sch_guided_analytical_chunked:
2935 *kind = kmp_sched_guided;
2938 *kind = kmp_sched_auto;
2940 case kmp_sch_trapezoidal:
2941 *kind = kmp_sched_trapezoidal;
2943#if KMP_STATIC_STEAL_ENABLED
2944 case kmp_sch_static_steal:
2945 *kind = kmp_sched_static_steal;
2949 KMP_FATAL(UnknownSchedulingType, th_type);
2952 __kmp_sched_apply_mods_stdkind(kind, th_type);
2953 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2956int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2962 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2963 KMP_DEBUG_ASSERT(__kmp_init_serial);
2970 thr = __kmp_threads[gtid];
2971 team = thr->th.th_team;
2972 ii = team->t.t_level;
2976 if (thr->th.th_teams_microtask) {
2978 int tlevel = thr->th.th_teams_level;
2981 KMP_DEBUG_ASSERT(ii >= tlevel);
2993 return __kmp_tid_from_gtid(gtid);
2995 dd = team->t.t_serialized;
2997 while (ii > level) {
2998 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3000 if ((team->t.t_serialized) && (!dd)) {
3001 team = team->t.t_parent;
3005 team = team->t.t_parent;
3006 dd = team->t.t_serialized;
3011 return (dd > 1) ? (0) : (team->t.t_master_tid);
3014int __kmp_get_team_size(
int gtid,
int level) {
3020 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3021 KMP_DEBUG_ASSERT(__kmp_init_serial);
3028 thr = __kmp_threads[gtid];
3029 team = thr->th.th_team;
3030 ii = team->t.t_level;
3034 if (thr->th.th_teams_microtask) {
3036 int tlevel = thr->th.th_teams_level;
3039 KMP_DEBUG_ASSERT(ii >= tlevel);
3050 while (ii > level) {
3051 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3053 if (team->t.t_serialized && (!dd)) {
3054 team = team->t.t_parent;
3058 team = team->t.t_parent;
3063 return team->t.t_nproc;
3066kmp_r_sched_t __kmp_get_schedule_global() {
3071 kmp_r_sched_t r_sched;
3077 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3078 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3081 r_sched.r_sched_type = __kmp_static;
3084 r_sched.r_sched_type = __kmp_guided;
3086 r_sched.r_sched_type = __kmp_sched;
3088 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3090 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3092 r_sched.chunk = KMP_DEFAULT_CHUNK;
3094 r_sched.chunk = __kmp_chunk;
3102static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3104 KMP_DEBUG_ASSERT(team);
3105 if (!realloc || argc > team->t.t_max_argc) {
3107 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3108 "current entries=%d\n",
3109 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3111 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3112 __kmp_free((
void *)team->t.t_argv);
3114 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3116 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3117 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3119 team->t.t_id, team->t.t_max_argc));
3120 team->t.t_argv = &team->t.t_inline_argv[0];
3121 if (__kmp_storage_map) {
3122 __kmp_print_storage_map_gtid(
3123 -1, &team->t.t_inline_argv[0],
3124 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3125 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3130 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3131 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3133 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3135 team->t.t_id, team->t.t_max_argc));
3137 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3138 if (__kmp_storage_map) {
3139 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3140 &team->t.t_argv[team->t.t_max_argc],
3141 sizeof(
void *) * team->t.t_max_argc,
3142 "team_%d.t_argv", team->t.t_id);
3148static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3150 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3152 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3153 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3154 sizeof(dispatch_shared_info_t) * num_disp_buff);
3155 team->t.t_dispatch =
3156 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3157 team->t.t_implicit_task_taskdata =
3158 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3159 team->t.t_max_nproc = max_nth;
3162 for (i = 0; i < num_disp_buff; ++i) {
3163 team->t.t_disp_buffer[i].buffer_index = i;
3164 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3168static void __kmp_free_team_arrays(kmp_team_t *team) {
3171 for (i = 0; i < team->t.t_max_nproc; ++i) {
3172 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3173 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3174 team->t.t_dispatch[i].th_disp_buffer = NULL;
3177#if KMP_USE_HIER_SCHED
3178 __kmp_dispatch_free_hierarchies(team);
3180 __kmp_free(team->t.t_threads);
3181 __kmp_free(team->t.t_disp_buffer);
3182 __kmp_free(team->t.t_dispatch);
3183 __kmp_free(team->t.t_implicit_task_taskdata);
3184 team->t.t_threads = NULL;
3185 team->t.t_disp_buffer = NULL;
3186 team->t.t_dispatch = NULL;
3187 team->t.t_implicit_task_taskdata = 0;
3190static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3191 kmp_info_t **oldThreads = team->t.t_threads;
3193 __kmp_free(team->t.t_disp_buffer);
3194 __kmp_free(team->t.t_dispatch);
3195 __kmp_free(team->t.t_implicit_task_taskdata);
3196 __kmp_allocate_team_arrays(team, max_nth);
3198 KMP_MEMCPY(team->t.t_threads, oldThreads,
3199 team->t.t_nproc *
sizeof(kmp_info_t *));
3201 __kmp_free(oldThreads);
3204static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3206 kmp_r_sched_t r_sched =
3207 __kmp_get_schedule_global();
3209 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3211 kmp_internal_control_t g_icvs = {
3213 (kmp_int8)__kmp_global.g.g_dynamic,
3215 (kmp_int8)__kmp_env_blocktime,
3217 __kmp_dflt_blocktime,
3222 __kmp_dflt_team_nth,
3226 __kmp_dflt_max_active_levels,
3230 __kmp_nested_proc_bind.bind_types[0],
3231 __kmp_default_device,
3238static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3240 kmp_internal_control_t gx_icvs;
3241 gx_icvs.serial_nesting_level =
3243 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3244 gx_icvs.next = NULL;
3249static void __kmp_initialize_root(kmp_root_t *root) {
3251 kmp_team_t *root_team;
3252 kmp_team_t *hot_team;
3253 int hot_team_max_nth;
3254 kmp_r_sched_t r_sched =
3255 __kmp_get_schedule_global();
3256 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3257 KMP_DEBUG_ASSERT(root);
3258 KMP_ASSERT(!root->r.r_begin);
3261 __kmp_init_lock(&root->r.r_begin_lock);
3262 root->r.r_begin = FALSE;
3263 root->r.r_active = FALSE;
3264 root->r.r_in_parallel = 0;
3265 root->r.r_blocktime = __kmp_dflt_blocktime;
3266#if KMP_AFFINITY_SUPPORTED
3267 root->r.r_affinity_assigned = FALSE;
3272 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3275 __kmp_allocate_team(root,
3281 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3283 USE_NESTED_HOT_ARG(NULL)
3288 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3291 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3293 root->r.r_root_team = root_team;
3294 root_team->t.t_control_stack_top = NULL;
3297 root_team->t.t_threads[0] = NULL;
3298 root_team->t.t_nproc = 1;
3299 root_team->t.t_serialized = 1;
3301 root_team->t.t_sched.sched = r_sched.sched;
3304 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3305 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3309 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3312 __kmp_allocate_team(root,
3314 __kmp_dflt_team_nth_ub * 2,
3318 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3320 USE_NESTED_HOT_ARG(NULL)
3322 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3324 root->r.r_hot_team = hot_team;
3325 root_team->t.t_control_stack_top = NULL;
3328 hot_team->t.t_parent = root_team;
3331 hot_team_max_nth = hot_team->t.t_max_nproc;
3332 for (f = 0; f < hot_team_max_nth; ++f) {
3333 hot_team->t.t_threads[f] = NULL;
3335 hot_team->t.t_nproc = 1;
3337 hot_team->t.t_sched.sched = r_sched.sched;
3338 hot_team->t.t_size_changed = 0;
3343typedef struct kmp_team_list_item {
3344 kmp_team_p
const *entry;
3345 struct kmp_team_list_item *next;
3346} kmp_team_list_item_t;
3347typedef kmp_team_list_item_t *kmp_team_list_t;
3349static void __kmp_print_structure_team_accum(
3350 kmp_team_list_t list,
3351 kmp_team_p
const *team
3361 KMP_DEBUG_ASSERT(list != NULL);
3366 __kmp_print_structure_team_accum(list, team->t.t_parent);
3367 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3371 while (l->next != NULL && l->entry != team) {
3374 if (l->next != NULL) {
3380 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3386 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3387 sizeof(kmp_team_list_item_t));
3394static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3397 __kmp_printf(
"%s", title);
3399 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3401 __kmp_printf(
" - (nil)\n");
3405static void __kmp_print_structure_thread(
char const *title,
3406 kmp_info_p
const *thread) {
3407 __kmp_printf(
"%s", title);
3408 if (thread != NULL) {
3409 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3411 __kmp_printf(
" - (nil)\n");
3415void __kmp_print_structure(
void) {
3417 kmp_team_list_t list;
3421 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3425 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3426 "Table\n------------------------------\n");
3429 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3430 __kmp_printf(
"%2d", gtid);
3431 if (__kmp_threads != NULL) {
3432 __kmp_printf(
" %p", __kmp_threads[gtid]);
3434 if (__kmp_root != NULL) {
3435 __kmp_printf(
" %p", __kmp_root[gtid]);
3442 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3444 if (__kmp_threads != NULL) {
3446 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3447 kmp_info_t
const *thread = __kmp_threads[gtid];
3448 if (thread != NULL) {
3449 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3450 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3451 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3452 __kmp_print_structure_team(
" Serial Team: ",
3453 thread->th.th_serial_team);
3454 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3455 __kmp_print_structure_thread(
" Primary: ",
3456 thread->th.th_team_master);
3457 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3458 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3459 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3460 __kmp_print_structure_thread(
" Next in pool: ",
3461 thread->th.th_next_pool);
3463 __kmp_print_structure_team_accum(list, thread->th.th_team);
3464 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3468 __kmp_printf(
"Threads array is not allocated.\n");
3472 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3474 if (__kmp_root != NULL) {
3476 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3477 kmp_root_t
const *root = __kmp_root[gtid];
3479 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3480 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3481 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3482 __kmp_print_structure_thread(
" Uber Thread: ",
3483 root->r.r_uber_thread);
3484 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3485 __kmp_printf(
" In Parallel: %2d\n",
3486 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3488 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3489 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3493 __kmp_printf(
"Ubers array is not allocated.\n");
3496 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3498 while (list->next != NULL) {
3499 kmp_team_p
const *team = list->entry;
3501 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3502 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3503 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3504 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3505 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3506 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3507 for (i = 0; i < team->t.t_nproc; ++i) {
3508 __kmp_printf(
" Thread %2d: ", i);
3509 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3511 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3517 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3519 __kmp_print_structure_thread(
"Thread pool: ",
3520 CCAST(kmp_info_t *, __kmp_thread_pool));
3521 __kmp_print_structure_team(
"Team pool: ",
3522 CCAST(kmp_team_t *, __kmp_team_pool));
3526 while (list != NULL) {
3527 kmp_team_list_item_t *item = list;
3529 KMP_INTERNAL_FREE(item);
3538static const unsigned __kmp_primes[] = {
3539 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3540 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3541 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3542 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3543 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3544 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3545 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3546 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3547 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3548 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3549 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3553unsigned short __kmp_get_random(kmp_info_t *thread) {
3554 unsigned x = thread->th.th_x;
3555 unsigned short r = (
unsigned short)(x >> 16);
3557 thread->th.th_x = x * thread->th.th_a + 1;
3559 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3560 thread->th.th_info.ds.ds_tid, r));
3566void __kmp_init_random(kmp_info_t *thread) {
3567 unsigned seed = thread->th.th_info.ds.ds_tid;
3570 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3571 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3573 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3579static int __kmp_reclaim_dead_roots(
void) {
3582 for (i = 0; i < __kmp_threads_capacity; ++i) {
3583 if (KMP_UBER_GTID(i) &&
3584 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3587 r += __kmp_unregister_root_other_thread(i);
3612static int __kmp_expand_threads(
int nNeed) {
3614 int minimumRequiredCapacity;
3616 kmp_info_t **newThreads;
3617 kmp_root_t **newRoot;
3623#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3626 added = __kmp_reclaim_dead_roots();
3655 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3658 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3662 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3664 newCapacity = __kmp_threads_capacity;
3666 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3667 : __kmp_sys_max_nth;
3668 }
while (newCapacity < minimumRequiredCapacity);
3669 newThreads = (kmp_info_t **)__kmp_allocate(
3670 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3672 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3673 KMP_MEMCPY(newThreads, __kmp_threads,
3674 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3675 KMP_MEMCPY(newRoot, __kmp_root,
3676 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3679 kmp_old_threads_list_t *node =
3680 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3681 node->threads = __kmp_threads;
3682 node->next = __kmp_old_threads_list;
3683 __kmp_old_threads_list = node;
3685 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3686 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3687 added += newCapacity - __kmp_threads_capacity;
3688 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3690 if (newCapacity > __kmp_tp_capacity) {
3691 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3692 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3693 __kmp_threadprivate_resize_cache(newCapacity);
3695 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3697 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3706int __kmp_register_root(
int initial_thread) {
3707 kmp_info_t *root_thread;
3711 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3712 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3729 capacity = __kmp_threads_capacity;
3730 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3737 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3738 capacity -= __kmp_hidden_helper_threads_num;
3742 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3743 if (__kmp_tp_cached) {
3744 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3745 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3746 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3748 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3758 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3761 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3762 gtid <= __kmp_hidden_helper_threads_num;
3765 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3766 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3767 "hidden helper thread: T#%d\n",
3773 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3776 for (gtid = __kmp_hidden_helper_threads_num + 1;
3777 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3781 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3782 KMP_ASSERT(gtid < __kmp_threads_capacity);
3787 TCW_4(__kmp_nth, __kmp_nth + 1);
3791 if (__kmp_adjust_gtid_mode) {
3792 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3793 if (TCR_4(__kmp_gtid_mode) != 2) {
3794 TCW_4(__kmp_gtid_mode, 2);
3797 if (TCR_4(__kmp_gtid_mode) != 1) {
3798 TCW_4(__kmp_gtid_mode, 1);
3803#ifdef KMP_ADJUST_BLOCKTIME
3806 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3807 if (__kmp_nth > __kmp_avail_proc) {
3808 __kmp_zero_bt = TRUE;
3814 if (!(root = __kmp_root[gtid])) {
3815 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3816 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3819#if KMP_STATS_ENABLED
3821 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3822 __kmp_stats_thread_ptr->startLife();
3823 KMP_SET_THREAD_STATE(SERIAL_REGION);
3826 __kmp_initialize_root(root);
3829 if (root->r.r_uber_thread) {
3830 root_thread = root->r.r_uber_thread;
3832 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3833 if (__kmp_storage_map) {
3834 __kmp_print_thread_storage_map(root_thread, gtid);
3836 root_thread->th.th_info.ds.ds_gtid = gtid;
3838 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3840 root_thread->th.th_root = root;
3841 if (__kmp_env_consistency_check) {
3842 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3845 __kmp_initialize_fast_memory(root_thread);
3849 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3850 __kmp_initialize_bget(root_thread);
3852 __kmp_init_random(root_thread);
3856 if (!root_thread->th.th_serial_team) {
3857 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3858 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3859 root_thread->th.th_serial_team = __kmp_allocate_team(
3864 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3866 KMP_ASSERT(root_thread->th.th_serial_team);
3867 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3868 root_thread->th.th_serial_team));
3871 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3873 root->r.r_root_team->t.t_threads[0] = root_thread;
3874 root->r.r_hot_team->t.t_threads[0] = root_thread;
3875 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3877 root_thread->th.th_serial_team->t.t_serialized = 0;
3878 root->r.r_uber_thread = root_thread;
3881 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3882 TCW_4(__kmp_init_gtid, TRUE);
3885 __kmp_gtid_set_specific(gtid);
3888 __kmp_itt_thread_name(gtid);
3891#ifdef KMP_TDATA_GTID
3894 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3895 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3897 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3899 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3900 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3901 KMP_INIT_BARRIER_STATE));
3904 for (b = 0; b < bs_last_barrier; ++b) {
3905 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3907 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3911 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3912 KMP_INIT_BARRIER_STATE);
3914#if KMP_AFFINITY_SUPPORTED
3915 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3916 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3917 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3918 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3920 root_thread->th.th_def_allocator = __kmp_def_allocator;
3921 root_thread->th.th_prev_level = 0;
3922 root_thread->th.th_prev_num_threads = 1;
3924 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3925 tmp->cg_root = root_thread;
3926 tmp->cg_thread_limit = __kmp_cg_max_nth;
3927 tmp->cg_nthreads = 1;
3928 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3929 " cg_nthreads init to 1\n",
3932 root_thread->th.th_cg_roots = tmp;
3934 __kmp_root_counter++;
3937 if (!initial_thread && ompt_enabled.enabled) {
3939 kmp_info_t *root_thread = ompt_get_thread();
3941 ompt_set_thread_state(root_thread, ompt_state_overhead);
3943 if (ompt_enabled.ompt_callback_thread_begin) {
3944 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3945 ompt_thread_initial, __ompt_get_thread_data_internal());
3947 ompt_data_t *task_data;
3948 ompt_data_t *parallel_data;
3949 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3951 if (ompt_enabled.ompt_callback_implicit_task) {
3952 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3953 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3956 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3960 if (ompd_state & OMPD_ENABLE_BP)
3961 ompd_bp_thread_begin();
3965 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3970#if KMP_NESTED_HOT_TEAMS
3971static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3972 const int max_level) {
3974 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3975 if (!hot_teams || !hot_teams[level].hot_team) {
3978 KMP_DEBUG_ASSERT(level < max_level);
3979 kmp_team_t *team = hot_teams[level].hot_team;
3980 nth = hot_teams[level].hot_team_nth;
3982 if (level < max_level - 1) {
3983 for (i = 0; i < nth; ++i) {
3984 kmp_info_t *th = team->t.t_threads[i];
3985 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3986 if (i > 0 && th->th.th_hot_teams) {
3987 __kmp_free(th->th.th_hot_teams);
3988 th->th.th_hot_teams = NULL;
3992 __kmp_free_team(root, team, NULL);
3999static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4000 kmp_team_t *root_team = root->r.r_root_team;
4001 kmp_team_t *hot_team = root->r.r_hot_team;
4002 int n = hot_team->t.t_nproc;
4005 KMP_DEBUG_ASSERT(!root->r.r_active);
4007 root->r.r_root_team = NULL;
4008 root->r.r_hot_team = NULL;
4011 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4012#if KMP_NESTED_HOT_TEAMS
4013 if (__kmp_hot_teams_max_level >
4015 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4016 kmp_info_t *th = hot_team->t.t_threads[i];
4017 if (__kmp_hot_teams_max_level > 1) {
4018 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4020 if (th->th.th_hot_teams) {
4021 __kmp_free(th->th.th_hot_teams);
4022 th->th.th_hot_teams = NULL;
4027 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4032 if (__kmp_tasking_mode != tskm_immediate_exec) {
4033 __kmp_wait_to_unref_task_teams();
4039 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4041 (LPVOID) & (root->r.r_uber_thread->th),
4042 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4043 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4047 if (ompd_state & OMPD_ENABLE_BP)
4048 ompd_bp_thread_end();
4052 ompt_data_t *task_data;
4053 ompt_data_t *parallel_data;
4054 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4056 if (ompt_enabled.ompt_callback_implicit_task) {
4057 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4058 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4060 if (ompt_enabled.ompt_callback_thread_end) {
4061 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4062 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4068 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4069 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4071 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4072 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4075 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4076 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4077 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4078 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4079 root->r.r_uber_thread->th.th_cg_roots = NULL;
4081 __kmp_reap_thread(root->r.r_uber_thread, 1);
4085 root->r.r_uber_thread = NULL;
4087 root->r.r_begin = FALSE;
4092void __kmp_unregister_root_current_thread(
int gtid) {
4093 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4097 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4098 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4099 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4102 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4105 kmp_root_t *root = __kmp_root[gtid];
4107 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4108 KMP_ASSERT(KMP_UBER_GTID(gtid));
4109 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4110 KMP_ASSERT(root->r.r_active == FALSE);
4114 kmp_info_t *thread = __kmp_threads[gtid];
4115 kmp_team_t *team = thread->th.th_team;
4116 kmp_task_team_t *task_team = thread->th.th_task_team;
4119 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4120 task_team->tt.tt_hidden_helper_task_encountered)) {
4123 thread->th.ompt_thread_info.state = ompt_state_undefined;
4125 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4128 __kmp_reset_root(gtid, root);
4132 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4134 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4141static int __kmp_unregister_root_other_thread(
int gtid) {
4142 kmp_root_t *root = __kmp_root[gtid];
4145 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4146 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4147 KMP_ASSERT(KMP_UBER_GTID(gtid));
4148 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4149 KMP_ASSERT(root->r.r_active == FALSE);
4151 r = __kmp_reset_root(gtid, root);
4153 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4159void __kmp_task_info() {
4161 kmp_int32 gtid = __kmp_entry_gtid();
4162 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4163 kmp_info_t *this_thr = __kmp_threads[gtid];
4164 kmp_team_t *steam = this_thr->th.th_serial_team;
4165 kmp_team_t *team = this_thr->th.th_team;
4168 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4170 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4171 team->t.t_implicit_task_taskdata[tid].td_parent);
4178static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4179 int tid,
int gtid) {
4183 KMP_DEBUG_ASSERT(this_thr != NULL);
4184 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4185 KMP_DEBUG_ASSERT(team);
4186 KMP_DEBUG_ASSERT(team->t.t_threads);
4187 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4188 kmp_info_t *master = team->t.t_threads[0];
4189 KMP_DEBUG_ASSERT(master);
4190 KMP_DEBUG_ASSERT(master->th.th_root);
4194 TCW_SYNC_PTR(this_thr->th.th_team, team);
4196 this_thr->th.th_info.ds.ds_tid = tid;
4197 this_thr->th.th_set_nproc = 0;
4198 if (__kmp_tasking_mode != tskm_immediate_exec)
4201 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4203 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4204 this_thr->th.th_set_proc_bind = proc_bind_default;
4205#if KMP_AFFINITY_SUPPORTED
4206 this_thr->th.th_new_place = this_thr->th.th_current_place;
4208 this_thr->th.th_root = master->th.th_root;
4211 this_thr->th.th_team_nproc = team->t.t_nproc;
4212 this_thr->th.th_team_master = master;
4213 this_thr->th.th_team_serialized = team->t.t_serialized;
4215 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4217 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4218 tid, gtid, this_thr, this_thr->th.th_current_task));
4220 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4223 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4224 tid, gtid, this_thr, this_thr->th.th_current_task));
4229 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4231 this_thr->th.th_local.this_construct = 0;
4233 if (!this_thr->th.th_pri_common) {
4234 this_thr->th.th_pri_common =
4235 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4236 if (__kmp_storage_map) {
4237 __kmp_print_storage_map_gtid(
4238 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4239 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4241 this_thr->th.th_pri_head = NULL;
4244 if (this_thr != master &&
4245 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4247 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4248 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4251 int i = tmp->cg_nthreads--;
4252 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4253 " on node %p of thread %p to %d\n",
4254 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4259 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4261 this_thr->th.th_cg_roots->cg_nthreads++;
4262 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4263 " node %p of thread %p to %d\n",
4264 this_thr, this_thr->th.th_cg_roots,
4265 this_thr->th.th_cg_roots->cg_root,
4266 this_thr->th.th_cg_roots->cg_nthreads));
4267 this_thr->th.th_current_task->td_icvs.thread_limit =
4268 this_thr->th.th_cg_roots->cg_thread_limit;
4273 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4276 sizeof(dispatch_private_info_t) *
4277 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4278 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4279 team->t.t_max_nproc));
4280 KMP_ASSERT(dispatch);
4281 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4282 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4284 dispatch->th_disp_index = 0;
4285 dispatch->th_doacross_buf_idx = 0;
4286 if (!dispatch->th_disp_buffer) {
4287 dispatch->th_disp_buffer =
4288 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4290 if (__kmp_storage_map) {
4291 __kmp_print_storage_map_gtid(
4292 gtid, &dispatch->th_disp_buffer[0],
4293 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4295 : __kmp_dispatch_num_buffers],
4297 "th_%d.th_dispatch.th_disp_buffer "
4298 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4299 gtid, team->t.t_id, gtid);
4302 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4305 dispatch->th_dispatch_pr_current = 0;
4306 dispatch->th_dispatch_sh_current = 0;
4308 dispatch->th_deo_fcn = 0;
4309 dispatch->th_dxo_fcn = 0;
4312 this_thr->th.th_next_pool = NULL;
4314 if (!this_thr->th.th_task_state_memo_stack) {
4316 this_thr->th.th_task_state_memo_stack =
4317 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4318 this_thr->th.th_task_state_top = 0;
4319 this_thr->th.th_task_state_stack_sz = 4;
4320 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4322 this_thr->th.th_task_state_memo_stack[i] = 0;
4325 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4326 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4336kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4338 kmp_team_t *serial_team;
4339 kmp_info_t *new_thr;
4342 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4343 KMP_DEBUG_ASSERT(root && team);
4344#if !KMP_NESTED_HOT_TEAMS
4345 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4350 if (__kmp_thread_pool) {
4351 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4352 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4353 if (new_thr == __kmp_thread_pool_insert_pt) {
4354 __kmp_thread_pool_insert_pt = NULL;
4356 TCW_4(new_thr->th.th_in_pool, FALSE);
4357 __kmp_suspend_initialize_thread(new_thr);
4358 __kmp_lock_suspend_mx(new_thr);
4359 if (new_thr->th.th_active_in_pool == TRUE) {
4360 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4361 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4362 new_thr->th.th_active_in_pool = FALSE;
4364 __kmp_unlock_suspend_mx(new_thr);
4366 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4367 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4368 KMP_ASSERT(!new_thr->th.th_team);
4369 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4372 __kmp_initialize_info(new_thr, team, new_tid,
4373 new_thr->th.th_info.ds.ds_gtid);
4374 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4376 TCW_4(__kmp_nth, __kmp_nth + 1);
4378 new_thr->th.th_task_state = 0;
4379 new_thr->th.th_task_state_top = 0;
4380 new_thr->th.th_task_state_stack_sz = 4;
4382 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4384 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4388#ifdef KMP_ADJUST_BLOCKTIME
4391 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4392 if (__kmp_nth > __kmp_avail_proc) {
4393 __kmp_zero_bt = TRUE;
4402 kmp_balign_t *balign = new_thr->th.th_bar;
4403 for (b = 0; b < bs_last_barrier; ++b)
4404 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4407 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4408 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4415 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4416 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4421 if (!TCR_4(__kmp_init_monitor)) {
4422 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4423 if (!TCR_4(__kmp_init_monitor)) {
4424 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4425 TCW_4(__kmp_init_monitor, 1);
4426 __kmp_create_monitor(&__kmp_monitor);
4427 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4438 while (TCR_4(__kmp_init_monitor) < 2) {
4441 KF_TRACE(10, (
"after monitor thread has started\n"));
4444 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4451 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4453 : __kmp_hidden_helper_threads_num + 1;
4455 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4457 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4460 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4461 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4466 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4468 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4470#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4473 __itt_suppress_mark_range(
4474 __itt_suppress_range, __itt_suppress_threading_errors,
4475 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4476 __itt_suppress_mark_range(
4477 __itt_suppress_range, __itt_suppress_threading_errors,
4478 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4480 __itt_suppress_mark_range(
4481 __itt_suppress_range, __itt_suppress_threading_errors,
4482 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4484 __itt_suppress_mark_range(__itt_suppress_range,
4485 __itt_suppress_threading_errors,
4486 &new_thr->th.th_suspend_init_count,
4487 sizeof(new_thr->th.th_suspend_init_count));
4490 __itt_suppress_mark_range(__itt_suppress_range,
4491 __itt_suppress_threading_errors,
4492 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4493 sizeof(new_thr->th.th_bar[0].bb.b_go));
4494 __itt_suppress_mark_range(__itt_suppress_range,
4495 __itt_suppress_threading_errors,
4496 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4497 sizeof(new_thr->th.th_bar[1].bb.b_go));
4498 __itt_suppress_mark_range(__itt_suppress_range,
4499 __itt_suppress_threading_errors,
4500 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4501 sizeof(new_thr->th.th_bar[2].bb.b_go));
4503 if (__kmp_storage_map) {
4504 __kmp_print_thread_storage_map(new_thr, new_gtid);
4509 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4510 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4511 new_thr->th.th_serial_team = serial_team =
4512 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4516 proc_bind_default, &r_icvs,
4517 0 USE_NESTED_HOT_ARG(NULL));
4519 KMP_ASSERT(serial_team);
4520 serial_team->t.t_serialized = 0;
4522 serial_team->t.t_threads[0] = new_thr;
4524 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4528 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4531 __kmp_initialize_fast_memory(new_thr);
4535 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4536 __kmp_initialize_bget(new_thr);
4539 __kmp_init_random(new_thr);
4543 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4544 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4547 kmp_balign_t *balign = new_thr->th.th_bar;
4548 for (b = 0; b < bs_last_barrier; ++b) {
4549 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4550 balign[b].bb.team = NULL;
4551 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4552 balign[b].bb.use_oncore_barrier = 0;
4555 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4556 new_thr->th.th_sleep_loc_type = flag_unset;
4558 new_thr->th.th_spin_here = FALSE;
4559 new_thr->th.th_next_waiting = 0;
4561 new_thr->th.th_blocking =
false;
4564#if KMP_AFFINITY_SUPPORTED
4565 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4566 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4567 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4568 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4570 new_thr->th.th_def_allocator = __kmp_def_allocator;
4571 new_thr->th.th_prev_level = 0;
4572 new_thr->th.th_prev_num_threads = 1;
4574 TCW_4(new_thr->th.th_in_pool, FALSE);
4575 new_thr->th.th_active_in_pool = FALSE;
4576 TCW_4(new_thr->th.th_active, TRUE);
4584 if (__kmp_adjust_gtid_mode) {
4585 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4586 if (TCR_4(__kmp_gtid_mode) != 2) {
4587 TCW_4(__kmp_gtid_mode, 2);
4590 if (TCR_4(__kmp_gtid_mode) != 1) {
4591 TCW_4(__kmp_gtid_mode, 1);
4596#ifdef KMP_ADJUST_BLOCKTIME
4599 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4600 if (__kmp_nth > __kmp_avail_proc) {
4601 __kmp_zero_bt = TRUE;
4608 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4609 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4611 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4613 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4624static void __kmp_reinitialize_team(kmp_team_t *team,
4625 kmp_internal_control_t *new_icvs,
4627 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4628 team->t.t_threads[0], team));
4629 KMP_DEBUG_ASSERT(team && new_icvs);
4630 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4631 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4633 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4635 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4636 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4638 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4639 team->t.t_threads[0], team));
4645static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4646 kmp_internal_control_t *new_icvs,
4648 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4651 KMP_DEBUG_ASSERT(team);
4652 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4653 KMP_DEBUG_ASSERT(team->t.t_threads);
4656 team->t.t_master_tid = 0;
4658 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4659 team->t.t_nproc = new_nproc;
4662 team->t.t_next_pool = NULL;
4666 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4667 team->t.t_invoke = NULL;
4670 team->t.t_sched.sched = new_icvs->sched.sched;
4672#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4673 team->t.t_fp_control_saved = FALSE;
4674 team->t.t_x87_fpu_control_word = 0;
4675 team->t.t_mxcsr = 0;
4678 team->t.t_construct = 0;
4680 team->t.t_ordered.dt.t_value = 0;
4681 team->t.t_master_active = FALSE;
4684 team->t.t_copypriv_data = NULL;
4687 team->t.t_copyin_counter = 0;
4690 team->t.t_control_stack_top = NULL;
4692 __kmp_reinitialize_team(team, new_icvs, loc);
4695 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4698#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4701__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4702 if (KMP_AFFINITY_CAPABLE()) {
4704 if (old_mask != NULL) {
4705 status = __kmp_get_system_affinity(old_mask, TRUE);
4708 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4712 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4717#if KMP_AFFINITY_SUPPORTED
4723static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4725 if (KMP_HIDDEN_HELPER_TEAM(team))
4728 kmp_info_t *master_th = team->t.t_threads[0];
4729 KMP_DEBUG_ASSERT(master_th != NULL);
4730 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4731 int first_place = master_th->th.th_first_place;
4732 int last_place = master_th->th.th_last_place;
4733 int masters_place = master_th->th.th_current_place;
4734 team->t.t_first_place = first_place;
4735 team->t.t_last_place = last_place;
4737 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4738 "bound to place %d partition = [%d,%d]\n",
4739 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4740 team->t.t_id, masters_place, first_place, last_place));
4742 switch (proc_bind) {
4744 case proc_bind_default:
4747 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4750 case proc_bind_primary: {
4752 int n_th = team->t.t_nproc;
4753 for (f = 1; f < n_th; f++) {
4754 kmp_info_t *th = team->t.t_threads[f];
4755 KMP_DEBUG_ASSERT(th != NULL);
4756 th->th.th_first_place = first_place;
4757 th->th.th_last_place = last_place;
4758 th->th.th_new_place = masters_place;
4759 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4760 team->t.t_display_affinity != 1) {
4761 team->t.t_display_affinity = 1;
4764 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4765 "partition = [%d,%d]\n",
4766 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4767 f, masters_place, first_place, last_place));
4771 case proc_bind_close: {
4773 int n_th = team->t.t_nproc;
4775 if (first_place <= last_place) {
4776 n_places = last_place - first_place + 1;
4778 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4780 if (n_th <= n_places) {
4781 int place = masters_place;
4782 for (f = 1; f < n_th; f++) {
4783 kmp_info_t *th = team->t.t_threads[f];
4784 KMP_DEBUG_ASSERT(th != NULL);
4786 if (place == last_place) {
4787 place = first_place;
4788 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4793 th->th.th_first_place = first_place;
4794 th->th.th_last_place = last_place;
4795 th->th.th_new_place = place;
4796 if (__kmp_display_affinity && place != th->th.th_current_place &&
4797 team->t.t_display_affinity != 1) {
4798 team->t.t_display_affinity = 1;
4801 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4802 "partition = [%d,%d]\n",
4803 __kmp_gtid_from_thread(team->t.t_threads[f]),
4804 team->t.t_id, f, place, first_place, last_place));
4807 int S, rem, gap, s_count;
4808 S = n_th / n_places;
4810 rem = n_th - (S * n_places);
4811 gap = rem > 0 ? n_places / rem : n_places;
4812 int place = masters_place;
4814 for (f = 0; f < n_th; f++) {
4815 kmp_info_t *th = team->t.t_threads[f];
4816 KMP_DEBUG_ASSERT(th != NULL);
4818 th->th.th_first_place = first_place;
4819 th->th.th_last_place = last_place;
4820 th->th.th_new_place = place;
4821 if (__kmp_display_affinity && place != th->th.th_current_place &&
4822 team->t.t_display_affinity != 1) {
4823 team->t.t_display_affinity = 1;
4827 if ((s_count == S) && rem && (gap_ct == gap)) {
4829 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4831 if (place == last_place) {
4832 place = first_place;
4833 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4841 }
else if (s_count == S) {
4842 if (place == last_place) {
4843 place = first_place;
4844 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4854 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4855 "partition = [%d,%d]\n",
4856 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4857 th->th.th_new_place, first_place, last_place));
4859 KMP_DEBUG_ASSERT(place == masters_place);
4863 case proc_bind_spread: {
4865 int n_th = team->t.t_nproc;
4868 if (first_place <= last_place) {
4869 n_places = last_place - first_place + 1;
4871 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4873 if (n_th <= n_places) {
4876 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4877 int S = n_places / n_th;
4878 int s_count, rem, gap, gap_ct;
4880 place = masters_place;
4881 rem = n_places - n_th * S;
4882 gap = rem ? n_th / rem : 1;
4885 if (update_master_only == 1)
4887 for (f = 0; f < thidx; f++) {
4888 kmp_info_t *th = team->t.t_threads[f];
4889 KMP_DEBUG_ASSERT(th != NULL);
4891 th->th.th_first_place = place;
4892 th->th.th_new_place = place;
4893 if (__kmp_display_affinity && place != th->th.th_current_place &&
4894 team->t.t_display_affinity != 1) {
4895 team->t.t_display_affinity = 1;
4898 while (s_count < S) {
4899 if (place == last_place) {
4900 place = first_place;
4901 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4908 if (rem && (gap_ct == gap)) {
4909 if (place == last_place) {
4910 place = first_place;
4911 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4919 th->th.th_last_place = place;
4922 if (place == last_place) {
4923 place = first_place;
4924 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4931 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4932 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4933 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4934 f, th->th.th_new_place, th->th.th_first_place,
4935 th->th.th_last_place, __kmp_affinity_num_masks));
4941 double current =
static_cast<double>(masters_place);
4943 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4948 if (update_master_only == 1)
4950 for (f = 0; f < thidx; f++) {
4951 first =
static_cast<int>(current);
4952 last =
static_cast<int>(current + spacing) - 1;
4953 KMP_DEBUG_ASSERT(last >= first);
4954 if (first >= n_places) {
4955 if (masters_place) {
4958 if (first == (masters_place + 1)) {
4959 KMP_DEBUG_ASSERT(f == n_th);
4962 if (last == masters_place) {
4963 KMP_DEBUG_ASSERT(f == (n_th - 1));
4967 KMP_DEBUG_ASSERT(f == n_th);
4972 if (last >= n_places) {
4973 last = (n_places - 1);
4978 KMP_DEBUG_ASSERT(0 <= first);
4979 KMP_DEBUG_ASSERT(n_places > first);
4980 KMP_DEBUG_ASSERT(0 <= last);
4981 KMP_DEBUG_ASSERT(n_places > last);
4982 KMP_DEBUG_ASSERT(last_place >= first_place);
4983 th = team->t.t_threads[f];
4984 KMP_DEBUG_ASSERT(th);
4985 th->th.th_first_place = first;
4986 th->th.th_new_place = place;
4987 th->th.th_last_place = last;
4988 if (__kmp_display_affinity && place != th->th.th_current_place &&
4989 team->t.t_display_affinity != 1) {
4990 team->t.t_display_affinity = 1;
4993 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4994 "partition = [%d,%d], spacing = %.4f\n",
4995 __kmp_gtid_from_thread(team->t.t_threads[f]),
4996 team->t.t_id, f, th->th.th_new_place,
4997 th->th.th_first_place, th->th.th_last_place, spacing));
5001 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5003 int S, rem, gap, s_count;
5004 S = n_th / n_places;
5006 rem = n_th - (S * n_places);
5007 gap = rem > 0 ? n_places / rem : n_places;
5008 int place = masters_place;
5011 if (update_master_only == 1)
5013 for (f = 0; f < thidx; f++) {
5014 kmp_info_t *th = team->t.t_threads[f];
5015 KMP_DEBUG_ASSERT(th != NULL);
5017 th->th.th_first_place = place;
5018 th->th.th_last_place = place;
5019 th->th.th_new_place = place;
5020 if (__kmp_display_affinity && place != th->th.th_current_place &&
5021 team->t.t_display_affinity != 1) {
5022 team->t.t_display_affinity = 1;
5026 if ((s_count == S) && rem && (gap_ct == gap)) {
5028 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5030 if (place == last_place) {
5031 place = first_place;
5032 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5040 }
else if (s_count == S) {
5041 if (place == last_place) {
5042 place = first_place;
5043 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5052 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5053 "partition = [%d,%d]\n",
5054 __kmp_gtid_from_thread(team->t.t_threads[f]),
5055 team->t.t_id, f, th->th.th_new_place,
5056 th->th.th_first_place, th->th.th_last_place));
5058 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5066 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5074__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5076 ompt_data_t ompt_parallel_data,
5078 kmp_proc_bind_t new_proc_bind,
5079 kmp_internal_control_t *new_icvs,
5080 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5081 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5084 int use_hot_team = !root->r.r_active;
5086 int do_place_partition = 1;
5088 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5089 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5090 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5093#if KMP_NESTED_HOT_TEAMS
5094 kmp_hot_team_ptr_t *hot_teams;
5096 team = master->th.th_team;
5097 level = team->t.t_active_level;
5098 if (master->th.th_teams_microtask) {
5099 if (master->th.th_teams_size.nteams > 1 &&
5102 (microtask_t)__kmp_teams_master ||
5103 master->th.th_teams_level <
5110 if ((master->th.th_teams_size.nteams == 1 &&
5111 master->th.th_teams_level >= team->t.t_level) ||
5112 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5113 do_place_partition = 0;
5115 hot_teams = master->th.th_hot_teams;
5116 if (level < __kmp_hot_teams_max_level && hot_teams &&
5117 hot_teams[level].hot_team) {
5125 KMP_DEBUG_ASSERT(new_nproc == 1);
5129 if (use_hot_team && new_nproc > 1) {
5130 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5131#if KMP_NESTED_HOT_TEAMS
5132 team = hot_teams[level].hot_team;
5134 team = root->r.r_hot_team;
5137 if (__kmp_tasking_mode != tskm_immediate_exec) {
5138 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5139 "task_team[1] = %p before reinit\n",
5140 team->t.t_task_team[0], team->t.t_task_team[1]));
5144 if (team->t.t_nproc != new_nproc &&
5145 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5147 int old_nthr = team->t.t_nproc;
5148 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5153 if (do_place_partition == 0)
5154 team->t.t_proc_bind = proc_bind_default;
5158 if (team->t.t_nproc == new_nproc) {
5159 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5162 if (team->t.t_size_changed == -1) {
5163 team->t.t_size_changed = 1;
5165 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5169 kmp_r_sched_t new_sched = new_icvs->sched;
5171 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5173 __kmp_reinitialize_team(team, new_icvs,
5174 root->r.r_uber_thread->th.th_ident);
5176 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5177 team->t.t_threads[0], team));
5178 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5180#if KMP_AFFINITY_SUPPORTED
5181 if ((team->t.t_size_changed == 0) &&
5182 (team->t.t_proc_bind == new_proc_bind)) {
5183 if (new_proc_bind == proc_bind_spread) {
5184 if (do_place_partition) {
5186 __kmp_partition_places(team, 1);
5189 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5190 "proc_bind = %d, partition = [%d,%d]\n",
5191 team->t.t_id, new_proc_bind, team->t.t_first_place,
5192 team->t.t_last_place));
5194 if (do_place_partition) {
5195 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5196 __kmp_partition_places(team);
5200 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5202 }
else if (team->t.t_nproc > new_nproc) {
5204 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5207 team->t.t_size_changed = 1;
5208 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5211 __kmp_add_threads_to_team(team, new_nproc);
5213#if KMP_NESTED_HOT_TEAMS
5214 if (__kmp_hot_teams_mode == 0) {
5217 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5218 hot_teams[level].hot_team_nth = new_nproc;
5221 for (f = new_nproc; f < team->t.t_nproc; f++) {
5222 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5223 if (__kmp_tasking_mode != tskm_immediate_exec) {
5226 team->t.t_threads[f]->th.th_task_team = NULL;
5228 __kmp_free_thread(team->t.t_threads[f]);
5229 team->t.t_threads[f] = NULL;
5231#if KMP_NESTED_HOT_TEAMS
5236 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5237 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5238 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5239 for (
int b = 0; b < bs_last_barrier; ++b) {
5240 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5241 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5243 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5248 team->t.t_nproc = new_nproc;
5250 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5251 __kmp_reinitialize_team(team, new_icvs,
5252 root->r.r_uber_thread->th.th_ident);
5255 for (f = 0; f < new_nproc; ++f) {
5256 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5261 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5262 team->t.t_threads[0], team));
5264 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5267 for (f = 0; f < team->t.t_nproc; f++) {
5268 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5269 team->t.t_threads[f]->th.th_team_nproc ==
5274 if (do_place_partition) {
5275 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5276#if KMP_AFFINITY_SUPPORTED
5277 __kmp_partition_places(team);
5281#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5282 kmp_affin_mask_t *old_mask;
5283 if (KMP_AFFINITY_CAPABLE()) {
5284 KMP_CPU_ALLOC(old_mask);
5289 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5291 int old_nproc = team->t.t_nproc;
5292 team->t.t_size_changed = 1;
5294#if KMP_NESTED_HOT_TEAMS
5295 int avail_threads = hot_teams[level].hot_team_nth;
5296 if (new_nproc < avail_threads)
5297 avail_threads = new_nproc;
5298 kmp_info_t **other_threads = team->t.t_threads;
5299 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5303 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5304 for (b = 0; b < bs_last_barrier; ++b) {
5305 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5306 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5308 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5312 if (hot_teams[level].hot_team_nth >= new_nproc) {
5315 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5316 team->t.t_nproc = new_nproc;
5320 team->t.t_nproc = hot_teams[level].hot_team_nth;
5321 hot_teams[level].hot_team_nth = new_nproc;
5323 if (team->t.t_max_nproc < new_nproc) {
5325 __kmp_reallocate_team_arrays(team, new_nproc);
5326 __kmp_reinitialize_team(team, new_icvs, NULL);
5329#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5335 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5339 for (f = team->t.t_nproc; f < new_nproc; f++) {
5340 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5341 KMP_DEBUG_ASSERT(new_worker);
5342 team->t.t_threads[f] = new_worker;
5345 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5346 "join=%llu, plain=%llu\n",
5347 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5348 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5349 team->t.t_bar[bs_plain_barrier].b_arrived));
5353 kmp_balign_t *balign = new_worker->th.th_bar;
5354 for (b = 0; b < bs_last_barrier; ++b) {
5355 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5356 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5357 KMP_BARRIER_PARENT_FLAG);
5359 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5365#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5366 if (KMP_AFFINITY_CAPABLE()) {
5368 __kmp_set_system_affinity(old_mask, TRUE);
5369 KMP_CPU_FREE(old_mask);
5372#if KMP_NESTED_HOT_TEAMS
5375 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5378 __kmp_add_threads_to_team(team, new_nproc);
5382 __kmp_initialize_team(team, new_nproc, new_icvs,
5383 root->r.r_uber_thread->th.th_ident);
5386 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5387 for (f = 0; f < team->t.t_nproc; ++f)
5388 __kmp_initialize_info(team->t.t_threads[f], team, f,
5389 __kmp_gtid_from_tid(f, team));
5397 for (f = old_nproc; f < team->t.t_nproc; ++f)
5398 team->t.t_threads[f]->th.th_task_state =
5399 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5402 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5403 for (f = old_nproc; f < team->t.t_nproc; ++f)
5404 team->t.t_threads[f]->th.th_task_state = old_state;
5408 for (f = 0; f < team->t.t_nproc; ++f) {
5409 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5410 team->t.t_threads[f]->th.th_team_nproc ==
5415 if (do_place_partition) {
5416 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5417#if KMP_AFFINITY_SUPPORTED
5418 __kmp_partition_places(team);
5423 kmp_info_t *master = team->t.t_threads[0];
5424 if (master->th.th_teams_microtask) {
5425 for (f = 1; f < new_nproc; ++f) {
5427 kmp_info_t *thr = team->t.t_threads[f];
5428 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5429 thr->th.th_teams_level = master->th.th_teams_level;
5430 thr->th.th_teams_size = master->th.th_teams_size;
5433#if KMP_NESTED_HOT_TEAMS
5437 for (f = 1; f < new_nproc; ++f) {
5438 kmp_info_t *thr = team->t.t_threads[f];
5440 kmp_balign_t *balign = thr->th.th_bar;
5441 for (b = 0; b < bs_last_barrier; ++b) {
5442 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5443 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5445 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5453 __kmp_alloc_argv_entries(argc, team, TRUE);
5454 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5458 KF_TRACE(10, (
" hot_team = %p\n", team));
5461 if (__kmp_tasking_mode != tskm_immediate_exec) {
5462 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5463 "task_team[1] = %p after reinit\n",
5464 team->t.t_task_team[0], team->t.t_task_team[1]));
5469 __ompt_team_assign_id(team, ompt_parallel_data);
5479 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5482 if (team->t.t_max_nproc >= max_nproc) {
5484 __kmp_team_pool = team->t.t_next_pool;
5486 if (max_nproc > 1 &&
5487 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5489 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5494 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5496 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5497 "task_team[1] %p to NULL\n",
5498 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5499 team->t.t_task_team[0] = NULL;
5500 team->t.t_task_team[1] = NULL;
5503 __kmp_alloc_argv_entries(argc, team, TRUE);
5504 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5507 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5508 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5511 for (b = 0; b < bs_last_barrier; ++b) {
5512 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5514 team->t.t_bar[b].b_master_arrived = 0;
5515 team->t.t_bar[b].b_team_arrived = 0;
5520 team->t.t_proc_bind = new_proc_bind;
5522 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5526 __ompt_team_assign_id(team, ompt_parallel_data);
5538 team = __kmp_reap_team(team);
5539 __kmp_team_pool = team;
5544 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5547 team->t.t_max_nproc = max_nproc;
5548 if (max_nproc > 1 &&
5549 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5551 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5556 __kmp_allocate_team_arrays(team, max_nproc);
5558 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5559 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5561 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5563 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5564 team->t.t_task_team[0] = NULL;
5566 team->t.t_task_team[1] = NULL;
5569 if (__kmp_storage_map) {
5570 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5574 __kmp_alloc_argv_entries(argc, team, FALSE);
5575 team->t.t_argc = argc;
5578 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5579 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5582 for (b = 0; b < bs_last_barrier; ++b) {
5583 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5585 team->t.t_bar[b].b_master_arrived = 0;
5586 team->t.t_bar[b].b_team_arrived = 0;
5591 team->t.t_proc_bind = new_proc_bind;
5594 __ompt_team_assign_id(team, ompt_parallel_data);
5595 team->t.ompt_serialized_team_info = NULL;
5600 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5611void __kmp_free_team(kmp_root_t *root,
5612 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5614 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5618 KMP_DEBUG_ASSERT(root);
5619 KMP_DEBUG_ASSERT(team);
5620 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5621 KMP_DEBUG_ASSERT(team->t.t_threads);
5623 int use_hot_team = team == root->r.r_hot_team;
5624#if KMP_NESTED_HOT_TEAMS
5627 level = team->t.t_active_level - 1;
5628 if (master->th.th_teams_microtask) {
5629 if (master->th.th_teams_size.nteams > 1) {
5633 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5634 master->th.th_teams_level == team->t.t_level) {
5640 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5642 if (level < __kmp_hot_teams_max_level) {
5643 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5650 TCW_SYNC_PTR(team->t.t_pkfn,
5653 team->t.t_copyin_counter = 0;
5658 if (!use_hot_team) {
5659 if (__kmp_tasking_mode != tskm_immediate_exec) {
5661 for (f = 1; f < team->t.t_nproc; ++f) {
5662 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5663 kmp_info_t *th = team->t.t_threads[f];
5664 volatile kmp_uint32 *state = &th->th.th_reap_state;
5665 while (*state != KMP_SAFE_TO_REAP) {
5669 if (!__kmp_is_thread_alive(th, &ecode)) {
5670 *state = KMP_SAFE_TO_REAP;
5675 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5676 if (fl.is_sleeping())
5677 fl.resume(__kmp_gtid_from_thread(th));
5684 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5685 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5686 if (task_team != NULL) {
5687 for (f = 0; f < team->t.t_nproc; ++f) {
5688 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5689 team->t.t_threads[f]->th.th_task_team = NULL;
5693 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5694 __kmp_get_gtid(), task_team, team->t.t_id));
5695#if KMP_NESTED_HOT_TEAMS
5696 __kmp_free_task_team(master, task_team);
5698 team->t.t_task_team[tt_idx] = NULL;
5704 team->t.t_parent = NULL;
5705 team->t.t_level = 0;
5706 team->t.t_active_level = 0;
5709 for (f = 1; f < team->t.t_nproc; ++f) {
5710 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5711 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5712 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5715 __kmp_free_thread(team->t.t_threads[f]);
5718 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5721 team->t.b->go_release();
5722 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5723 for (f = 1; f < team->t.t_nproc; ++f) {
5724 if (team->t.b->sleep[f].sleep) {
5725 __kmp_atomic_resume_64(
5726 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5727 (kmp_atomic_flag_64<> *)NULL);
5732 for (
int f = 1; f < team->t.t_nproc; ++f) {
5733 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5739 for (f = 1; f < team->t.t_nproc; ++f) {
5740 team->t.t_threads[f] = NULL;
5743 if (team->t.t_max_nproc > 1 &&
5744 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5745 distributedBarrier::deallocate(team->t.b);
5750 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5751 __kmp_team_pool = (
volatile kmp_team_t *)team;
5754 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5755 team->t.t_threads[1]->th.th_cg_roots);
5756 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5758 for (f = 1; f < team->t.t_nproc; ++f) {
5759 kmp_info_t *thr = team->t.t_threads[f];
5760 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5761 thr->th.th_cg_roots->cg_root == thr);
5763 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5764 thr->th.th_cg_roots = tmp->up;
5765 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5766 " up to node %p. cg_nthreads was %d\n",
5767 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5768 int i = tmp->cg_nthreads--;
5773 if (thr->th.th_cg_roots)
5774 thr->th.th_current_task->td_icvs.thread_limit =
5775 thr->th.th_cg_roots->cg_thread_limit;
5784kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5785 kmp_team_t *next_pool = team->t.t_next_pool;
5787 KMP_DEBUG_ASSERT(team);
5788 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5789 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5790 KMP_DEBUG_ASSERT(team->t.t_threads);
5791 KMP_DEBUG_ASSERT(team->t.t_argv);
5796 __kmp_free_team_arrays(team);
5797 if (team->t.t_argv != &team->t.t_inline_argv[0])
5798 __kmp_free((
void *)team->t.t_argv);
5830void __kmp_free_thread(kmp_info_t *this_th) {
5834 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5835 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5837 KMP_DEBUG_ASSERT(this_th);
5842 kmp_balign_t *balign = this_th->th.th_bar;
5843 for (b = 0; b < bs_last_barrier; ++b) {
5844 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5845 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5846 balign[b].bb.team = NULL;
5847 balign[b].bb.leaf_kids = 0;
5849 this_th->th.th_task_state = 0;
5850 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5853 TCW_PTR(this_th->th.th_team, NULL);
5854 TCW_PTR(this_th->th.th_root, NULL);
5855 TCW_PTR(this_th->th.th_dispatch, NULL);
5857 while (this_th->th.th_cg_roots) {
5858 this_th->th.th_cg_roots->cg_nthreads--;
5859 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5860 " %p of thread %p to %d\n",
5861 this_th, this_th->th.th_cg_roots,
5862 this_th->th.th_cg_roots->cg_root,
5863 this_th->th.th_cg_roots->cg_nthreads));
5864 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5865 if (tmp->cg_root == this_th) {
5866 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5868 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5869 this_th->th.th_cg_roots = tmp->up;
5872 if (tmp->cg_nthreads == 0) {
5875 this_th->th.th_cg_roots = NULL;
5885 __kmp_free_implicit_task(this_th);
5886 this_th->th.th_current_task = NULL;
5890 gtid = this_th->th.th_info.ds.ds_gtid;
5891 if (__kmp_thread_pool_insert_pt != NULL) {
5892 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5893 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5894 __kmp_thread_pool_insert_pt = NULL;
5903 if (__kmp_thread_pool_insert_pt != NULL) {
5904 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5906 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5908 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5909 scan = &((*scan)->th.th_next_pool))
5914 TCW_PTR(this_th->th.th_next_pool, *scan);
5915 __kmp_thread_pool_insert_pt = *scan = this_th;
5916 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5917 (this_th->th.th_info.ds.ds_gtid <
5918 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5919 TCW_4(this_th->th.th_in_pool, TRUE);
5920 __kmp_suspend_initialize_thread(this_th);
5921 __kmp_lock_suspend_mx(this_th);
5922 if (this_th->th.th_active == TRUE) {
5923 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5924 this_th->th.th_active_in_pool = TRUE;
5928 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5931 __kmp_unlock_suspend_mx(this_th);
5933 TCW_4(__kmp_nth, __kmp_nth - 1);
5935#ifdef KMP_ADJUST_BLOCKTIME
5938 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5939 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5940 if (__kmp_nth <= __kmp_avail_proc) {
5941 __kmp_zero_bt = FALSE;
5951void *__kmp_launch_thread(kmp_info_t *this_thr) {
5952#if OMP_PROFILING_SUPPORT
5953 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5955 if (ProfileTraceFile)
5956 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5959 int gtid = this_thr->th.th_info.ds.ds_gtid;
5961 kmp_team_t **
volatile pteam;
5964 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5966 if (__kmp_env_consistency_check) {
5967 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5971 if (ompd_state & OMPD_ENABLE_BP)
5972 ompd_bp_thread_begin();
5976 ompt_data_t *thread_data =
nullptr;
5977 if (ompt_enabled.enabled) {
5978 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5979 *thread_data = ompt_data_none;
5981 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5982 this_thr->th.ompt_thread_info.wait_id = 0;
5983 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5984 this_thr->th.ompt_thread_info.parallel_flags = 0;
5985 if (ompt_enabled.ompt_callback_thread_begin) {
5986 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5987 ompt_thread_worker, thread_data);
5989 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5994 while (!TCR_4(__kmp_global.g.g_done)) {
5995 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5999 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6002 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6005 if (ompt_enabled.enabled) {
6006 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6010 pteam = &this_thr->th.th_team;
6013 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6015 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6018 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6019 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6020 (*pteam)->t.t_pkfn));
6022 updateHWFPControl(*pteam);
6025 if (ompt_enabled.enabled) {
6026 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6030 rc = (*pteam)->t.t_invoke(gtid);
6034 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6035 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6036 (*pteam)->t.t_pkfn));
6039 if (ompt_enabled.enabled) {
6041 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6043 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6047 __kmp_join_barrier(gtid);
6050 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6053 if (ompd_state & OMPD_ENABLE_BP)
6054 ompd_bp_thread_end();
6058 if (ompt_enabled.ompt_callback_thread_end) {
6059 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6063 this_thr->th.th_task_team = NULL;
6065 __kmp_common_destroy_gtid(gtid);
6067 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6070#if OMP_PROFILING_SUPPORT
6071 llvm::timeTraceProfilerFinishThread();
6078void __kmp_internal_end_dest(
void *specific_gtid) {
6081 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6083 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6087 __kmp_internal_end_thread(gtid);
6090#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6092__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6093 __kmp_internal_end_atexit();
6100void __kmp_internal_end_atexit(
void) {
6101 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6125 __kmp_internal_end_library(-1);
6127 __kmp_close_console();
6131static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6136 KMP_DEBUG_ASSERT(thread != NULL);
6138 gtid = thread->th.th_info.ds.ds_gtid;
6141 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6144 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6146 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6148 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6150 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6154 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6156 __kmp_release_64(&flag);
6161 __kmp_reap_worker(thread);
6173 if (thread->th.th_active_in_pool) {
6174 thread->th.th_active_in_pool = FALSE;
6175 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6176 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6180 __kmp_free_implicit_task(thread);
6184 __kmp_free_fast_memory(thread);
6187 __kmp_suspend_uninitialize_thread(thread);
6189 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6190 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6195#ifdef KMP_ADJUST_BLOCKTIME
6198 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6199 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6200 if (__kmp_nth <= __kmp_avail_proc) {
6201 __kmp_zero_bt = FALSE;
6207 if (__kmp_env_consistency_check) {
6208 if (thread->th.th_cons) {
6209 __kmp_free_cons_stack(thread->th.th_cons);
6210 thread->th.th_cons = NULL;
6214 if (thread->th.th_pri_common != NULL) {
6215 __kmp_free(thread->th.th_pri_common);
6216 thread->th.th_pri_common = NULL;
6219 if (thread->th.th_task_state_memo_stack != NULL) {
6220 __kmp_free(thread->th.th_task_state_memo_stack);
6221 thread->th.th_task_state_memo_stack = NULL;
6225 if (thread->th.th_local.bget_data != NULL) {
6226 __kmp_finalize_bget(thread);
6230#if KMP_AFFINITY_SUPPORTED
6231 if (thread->th.th_affin_mask != NULL) {
6232 KMP_CPU_FREE(thread->th.th_affin_mask);
6233 thread->th.th_affin_mask = NULL;
6237#if KMP_USE_HIER_SCHED
6238 if (thread->th.th_hier_bar_data != NULL) {
6239 __kmp_free(thread->th.th_hier_bar_data);
6240 thread->th.th_hier_bar_data = NULL;
6244 __kmp_reap_team(thread->th.th_serial_team);
6245 thread->th.th_serial_team = NULL;
6252static void __kmp_itthash_clean(kmp_info_t *th) {
6254 if (__kmp_itt_region_domains.count > 0) {
6255 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6256 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6258 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6259 __kmp_thread_free(th, bucket);
6264 if (__kmp_itt_barrier_domains.count > 0) {
6265 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6266 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6268 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6269 __kmp_thread_free(th, bucket);
6277static void __kmp_internal_end(
void) {
6281 __kmp_unregister_library();
6288 __kmp_reclaim_dead_roots();
6292 for (i = 0; i < __kmp_threads_capacity; i++)
6294 if (__kmp_root[i]->r.r_active)
6297 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6299 if (i < __kmp_threads_capacity) {
6311 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6312 if (TCR_4(__kmp_init_monitor)) {
6313 __kmp_reap_monitor(&__kmp_monitor);
6314 TCW_4(__kmp_init_monitor, 0);
6316 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6317 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6323 for (i = 0; i < __kmp_threads_capacity; i++) {
6324 if (__kmp_root[i]) {
6327 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6336 while (__kmp_thread_pool != NULL) {
6338 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6339 __kmp_thread_pool = thread->th.th_next_pool;
6341 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6342 thread->th.th_next_pool = NULL;
6343 thread->th.th_in_pool = FALSE;
6344 __kmp_reap_thread(thread, 0);
6346 __kmp_thread_pool_insert_pt = NULL;
6349 while (__kmp_team_pool != NULL) {
6351 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6352 __kmp_team_pool = team->t.t_next_pool;
6354 team->t.t_next_pool = NULL;
6355 __kmp_reap_team(team);
6358 __kmp_reap_task_teams();
6365 for (i = 0; i < __kmp_threads_capacity; i++) {
6366 kmp_info_t *thr = __kmp_threads[i];
6367 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6372 for (i = 0; i < __kmp_threads_capacity; ++i) {
6379 TCW_SYNC_4(__kmp_init_common, FALSE);
6381 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6389 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6390 if (TCR_4(__kmp_init_monitor)) {
6391 __kmp_reap_monitor(&__kmp_monitor);
6392 TCW_4(__kmp_init_monitor, 0);
6394 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6395 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6398 TCW_4(__kmp_init_gtid, FALSE);
6407void __kmp_internal_end_library(
int gtid_req) {
6414 if (__kmp_global.g.g_abort) {
6415 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6419 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6420 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6425 if (TCR_4(__kmp_init_hidden_helper) &&
6426 !TCR_4(__kmp_hidden_helper_team_done)) {
6427 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6429 __kmp_hidden_helper_main_thread_release();
6431 __kmp_hidden_helper_threads_deinitz_wait();
6437 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6439 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6440 if (gtid == KMP_GTID_SHUTDOWN) {
6441 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6442 "already shutdown\n"));
6444 }
else if (gtid == KMP_GTID_MONITOR) {
6445 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6446 "registered, or system shutdown\n"));
6448 }
else if (gtid == KMP_GTID_DNE) {
6449 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6452 }
else if (KMP_UBER_GTID(gtid)) {
6454 if (__kmp_root[gtid]->r.r_active) {
6455 __kmp_global.g.g_abort = -1;
6456 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6457 __kmp_unregister_library();
6459 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6463 __kmp_itthash_clean(__kmp_threads[gtid]);
6466 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6467 __kmp_unregister_root_current_thread(gtid);
6474#ifdef DUMP_DEBUG_ON_EXIT
6475 if (__kmp_debug_buf)
6476 __kmp_dump_debug_buffer();
6481 __kmp_unregister_library();
6486 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6489 if (__kmp_global.g.g_abort) {
6490 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6492 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6495 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6496 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6505 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6508 __kmp_internal_end();
6510 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6511 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6513 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6515#ifdef DUMP_DEBUG_ON_EXIT
6516 if (__kmp_debug_buf)
6517 __kmp_dump_debug_buffer();
6521 __kmp_close_console();
6524 __kmp_fini_allocator();
6528void __kmp_internal_end_thread(
int gtid_req) {
6537 if (__kmp_global.g.g_abort) {
6538 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6542 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6543 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6548 if (TCR_4(__kmp_init_hidden_helper) &&
6549 !TCR_4(__kmp_hidden_helper_team_done)) {
6550 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6552 __kmp_hidden_helper_main_thread_release();
6554 __kmp_hidden_helper_threads_deinitz_wait();
6561 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6563 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6564 if (gtid == KMP_GTID_SHUTDOWN) {
6565 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6566 "already shutdown\n"));
6568 }
else if (gtid == KMP_GTID_MONITOR) {
6569 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6570 "registered, or system shutdown\n"));
6572 }
else if (gtid == KMP_GTID_DNE) {
6573 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6577 }
else if (KMP_UBER_GTID(gtid)) {
6579 if (__kmp_root[gtid]->r.r_active) {
6580 __kmp_global.g.g_abort = -1;
6581 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6583 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6587 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6589 __kmp_unregister_root_current_thread(gtid);
6593 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6596 __kmp_threads[gtid]->th.th_task_team = NULL;
6600 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6606 if (__kmp_pause_status != kmp_hard_paused)
6610 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6615 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6618 if (__kmp_global.g.g_abort) {
6619 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6621 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6624 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6625 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6636 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6638 for (i = 0; i < __kmp_threads_capacity; ++i) {
6639 if (KMP_UBER_GTID(i)) {
6642 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6643 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6644 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6651 __kmp_internal_end();
6653 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6654 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6656 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6658#ifdef DUMP_DEBUG_ON_EXIT
6659 if (__kmp_debug_buf)
6660 __kmp_dump_debug_buffer();
6667static long __kmp_registration_flag = 0;
6669static char *__kmp_registration_str = NULL;
6672static inline char *__kmp_reg_status_name() {
6678#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6679 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6682 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6686void __kmp_register_library_startup(
void) {
6688 char *name = __kmp_reg_status_name();
6694#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6695 __kmp_initialize_system_tick();
6697 __kmp_read_system_time(&time.dtime);
6698 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6699 __kmp_registration_str =
6700 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6701 __kmp_registration_flag, KMP_LIBRARY_FILE);
6703 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6704 __kmp_registration_str));
6710#if defined(KMP_USE_SHM)
6711 char *shm_name = __kmp_str_format(
"/%s", name);
6712 int shm_preexist = 0;
6714 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6715 if ((fd1 == -1) && (errno == EEXIST)) {
6718 fd1 = shm_open(shm_name, O_RDWR, 0666);
6721 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6727 }
else if (fd1 == -1) {
6730 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6733 if (shm_preexist == 0) {
6735 if (ftruncate(fd1, SHM_SIZE) == -1) {
6737 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6738 KMP_ERR(errno), __kmp_msg_null);
6742 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6743 if (data1 == MAP_FAILED) {
6745 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6748 if (shm_preexist == 0) {
6749 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6752 value = __kmp_str_format(
"%s", data1);
6753 munmap(data1, SHM_SIZE);
6757 __kmp_env_set(name, __kmp_registration_str, 0);
6759 value = __kmp_env_get(name);
6762 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6769 char *flag_addr_str = NULL;
6770 char *flag_val_str = NULL;
6771 char const *file_name = NULL;
6772 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6773 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6776 unsigned long *flag_addr = 0;
6777 unsigned long flag_val = 0;
6778 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6779 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6780 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6784 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6798 file_name =
"unknown library";
6803 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6804 if (!__kmp_str_match_true(duplicate_ok)) {
6806 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6807 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6809 KMP_INTERNAL_FREE(duplicate_ok);
6810 __kmp_duplicate_library_ok = 1;
6815#if defined(KMP_USE_SHM)
6817 shm_unlink(shm_name);
6820 __kmp_env_unset(name);
6824 KMP_DEBUG_ASSERT(0);
6828 KMP_INTERNAL_FREE((
void *)value);
6829#if defined(KMP_USE_SHM)
6830 KMP_INTERNAL_FREE((
void *)shm_name);
6833 KMP_INTERNAL_FREE((
void *)name);
6837void __kmp_unregister_library(
void) {
6839 char *name = __kmp_reg_status_name();
6842#if defined(KMP_USE_SHM)
6843 char *shm_name = __kmp_str_format(
"/%s", name);
6844 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6849 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6850 if (data1 != MAP_FAILED) {
6851 value = __kmp_str_format(
"%s", data1);
6852 munmap(data1, SHM_SIZE);
6856 value = __kmp_env_get(name);
6859 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6860 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6861 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6863#if defined(KMP_USE_SHM)
6864 shm_unlink(shm_name);
6866 __kmp_env_unset(name);
6870#if defined(KMP_USE_SHM)
6871 KMP_INTERNAL_FREE(shm_name);
6874 KMP_INTERNAL_FREE(__kmp_registration_str);
6875 KMP_INTERNAL_FREE(value);
6876 KMP_INTERNAL_FREE(name);
6878 __kmp_registration_flag = 0;
6879 __kmp_registration_str = NULL;
6886#if KMP_MIC_SUPPORTED
6888static void __kmp_check_mic_type() {
6889 kmp_cpuid_t cpuid_state = {0};
6890 kmp_cpuid_t *cs_p = &cpuid_state;
6891 __kmp_x86_cpuid(1, 0, cs_p);
6893 if ((cs_p->eax & 0xff0) == 0xB10) {
6894 __kmp_mic_type = mic2;
6895 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6896 __kmp_mic_type = mic3;
6898 __kmp_mic_type = non_mic;
6905static void __kmp_user_level_mwait_init() {
6906 struct kmp_cpuid buf;
6907 __kmp_x86_cpuid(7, 0, &buf);
6908 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6909 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6910 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6911 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6912 __kmp_umwait_enabled));
6915#ifndef AT_INTELPHIUSERMWAIT
6918#define AT_INTELPHIUSERMWAIT 10000
6923unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6924unsigned long getauxval(
unsigned long) {
return 0; }
6926static void __kmp_user_level_mwait_init() {
6931 if (__kmp_mic_type == mic3) {
6932 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6933 if ((res & 0x1) || __kmp_user_level_mwait) {
6934 __kmp_mwait_enabled = TRUE;
6935 if (__kmp_user_level_mwait) {
6936 KMP_INFORM(EnvMwaitWarn);
6939 __kmp_mwait_enabled = FALSE;
6942 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6943 "__kmp_mwait_enabled = %d\n",
6944 __kmp_mic_type, __kmp_mwait_enabled));
6948static void __kmp_do_serial_initialize(
void) {
6952 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6954 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6955 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6956 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6957 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6958 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6968 __kmp_validate_locks();
6971 __kmp_init_allocator();
6977 if (__kmp_need_register_serial)
6978 __kmp_register_library_startup();
6981 if (TCR_4(__kmp_global.g.g_done)) {
6982 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6985 __kmp_global.g.g_abort = 0;
6986 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6989#if KMP_USE_ADAPTIVE_LOCKS
6990#if KMP_DEBUG_ADAPTIVE_LOCKS
6991 __kmp_init_speculative_stats();
6994#if KMP_STATS_ENABLED
6997 __kmp_init_lock(&__kmp_global_lock);
6998 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6999 __kmp_init_lock(&__kmp_debug_lock);
7000 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7001 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7002 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7003 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7004 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7005 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7006 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7007 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7008 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7009 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7010 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7011 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7012 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7013 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7014 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7016 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7018 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7022 __kmp_runtime_initialize();
7024#if KMP_MIC_SUPPORTED
7025 __kmp_check_mic_type();
7032 __kmp_abort_delay = 0;
7036 __kmp_dflt_team_nth_ub = __kmp_xproc;
7037 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7038 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7040 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7041 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7043 __kmp_max_nth = __kmp_sys_max_nth;
7044 __kmp_cg_max_nth = __kmp_sys_max_nth;
7045 __kmp_teams_max_nth = __kmp_xproc;
7046 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7047 __kmp_teams_max_nth = __kmp_sys_max_nth;
7052 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7054 __kmp_monitor_wakeups =
7055 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7056 __kmp_bt_intervals =
7057 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7060 __kmp_library = library_throughput;
7062 __kmp_static = kmp_sch_static_balanced;
7069#if KMP_FAST_REDUCTION_BARRIER
7070#define kmp_reduction_barrier_gather_bb ((int)1)
7071#define kmp_reduction_barrier_release_bb ((int)1)
7072#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7073#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7075 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7076 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7077 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7078 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7079 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7080#if KMP_FAST_REDUCTION_BARRIER
7081 if (i == bs_reduction_barrier) {
7083 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7084 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7085 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7086 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7090#if KMP_FAST_REDUCTION_BARRIER
7091#undef kmp_reduction_barrier_release_pat
7092#undef kmp_reduction_barrier_gather_pat
7093#undef kmp_reduction_barrier_release_bb
7094#undef kmp_reduction_barrier_gather_bb
7096#if KMP_MIC_SUPPORTED
7097 if (__kmp_mic_type == mic2) {
7099 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7100 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7102 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7103 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7105#if KMP_FAST_REDUCTION_BARRIER
7106 if (__kmp_mic_type == mic2) {
7107 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7108 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7115 __kmp_env_checks = TRUE;
7117 __kmp_env_checks = FALSE;
7121 __kmp_foreign_tp = TRUE;
7123 __kmp_global.g.g_dynamic = FALSE;
7124 __kmp_global.g.g_dynamic_mode = dynamic_default;
7126 __kmp_init_nesting_mode();
7128 __kmp_env_initialize(NULL);
7130#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7131 __kmp_user_level_mwait_init();
7135 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7136 if (__kmp_str_match_true(val)) {
7137 kmp_str_buf_t buffer;
7138 __kmp_str_buf_init(&buffer);
7139 __kmp_i18n_dump_catalog(&buffer);
7140 __kmp_printf(
"%s", buffer.str);
7141 __kmp_str_buf_free(&buffer);
7143 __kmp_env_free(&val);
7146 __kmp_threads_capacity =
7147 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7149 __kmp_tp_capacity = __kmp_default_tp_capacity(
7150 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7155 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7156 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7157 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7158 __kmp_thread_pool = NULL;
7159 __kmp_thread_pool_insert_pt = NULL;
7160 __kmp_team_pool = NULL;
7167 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7169 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7170 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7171 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7174 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7176 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7181 gtid = __kmp_register_root(TRUE);
7182 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7183 KMP_ASSERT(KMP_UBER_GTID(gtid));
7184 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7188 __kmp_common_initialize();
7192 __kmp_register_atfork();
7199 int rc = atexit(__kmp_internal_end_atexit);
7201 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7207#if KMP_HANDLE_SIGNALS
7213 __kmp_install_signals(FALSE);
7216 __kmp_install_signals(TRUE);
7221 __kmp_init_counter++;
7223 __kmp_init_serial = TRUE;
7225 if (__kmp_settings) {
7229 if (__kmp_display_env || __kmp_display_env_verbose) {
7230 __kmp_env_print_2();
7239 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7242void __kmp_serial_initialize(
void) {
7243 if (__kmp_init_serial) {
7246 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7247 if (__kmp_init_serial) {
7248 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7251 __kmp_do_serial_initialize();
7252 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7255static void __kmp_do_middle_initialize(
void) {
7257 int prev_dflt_team_nth;
7259 if (!__kmp_init_serial) {
7260 __kmp_do_serial_initialize();
7263 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7265 if (UNLIKELY(!__kmp_need_register_serial)) {
7268 __kmp_register_library_startup();
7273 prev_dflt_team_nth = __kmp_dflt_team_nth;
7275#if KMP_AFFINITY_SUPPORTED
7278 __kmp_affinity_initialize();
7282 KMP_ASSERT(__kmp_xproc > 0);
7283 if (__kmp_avail_proc == 0) {
7284 __kmp_avail_proc = __kmp_xproc;
7290 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7291 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7296 if (__kmp_dflt_team_nth == 0) {
7297#ifdef KMP_DFLT_NTH_CORES
7299 __kmp_dflt_team_nth = __kmp_ncores;
7300 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7301 "__kmp_ncores (%d)\n",
7302 __kmp_dflt_team_nth));
7305 __kmp_dflt_team_nth = __kmp_avail_proc;
7306 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7307 "__kmp_avail_proc(%d)\n",
7308 __kmp_dflt_team_nth));
7312 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7313 __kmp_dflt_team_nth = KMP_MIN_NTH;
7315 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7316 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7319 if (__kmp_nesting_mode > 0)
7320 __kmp_set_nesting_mode_threads();
7324 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7326 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7331 for (i = 0; i < __kmp_threads_capacity; i++) {
7332 kmp_info_t *thread = __kmp_threads[i];
7335 if (thread->th.th_current_task->td_icvs.nproc != 0)
7338 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7343 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7344 __kmp_dflt_team_nth));
7346#ifdef KMP_ADJUST_BLOCKTIME
7348 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7349 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7350 if (__kmp_nth > __kmp_avail_proc) {
7351 __kmp_zero_bt = TRUE;
7357 TCW_SYNC_4(__kmp_init_middle, TRUE);
7359 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7362void __kmp_middle_initialize(
void) {
7363 if (__kmp_init_middle) {
7366 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7367 if (__kmp_init_middle) {
7368 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7371 __kmp_do_middle_initialize();
7372 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7375void __kmp_parallel_initialize(
void) {
7376 int gtid = __kmp_entry_gtid();
7379 if (TCR_4(__kmp_init_parallel))
7381 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7382 if (TCR_4(__kmp_init_parallel)) {
7383 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7388 if (TCR_4(__kmp_global.g.g_done)) {
7391 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7392 __kmp_infinite_loop();
7398 if (!__kmp_init_middle) {
7399 __kmp_do_middle_initialize();
7401 __kmp_assign_root_init_mask();
7402 __kmp_resume_if_hard_paused();
7405 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7406 KMP_ASSERT(KMP_UBER_GTID(gtid));
7408#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7411 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7412 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7413 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7417#if KMP_HANDLE_SIGNALS
7419 __kmp_install_signals(TRUE);
7423 __kmp_suspend_initialize();
7425#if defined(USE_LOAD_BALANCE)
7426 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7427 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7430 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7431 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7435 if (__kmp_version) {
7436 __kmp_print_version_2();
7440 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7443 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7445 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7448void __kmp_hidden_helper_initialize() {
7449 if (TCR_4(__kmp_init_hidden_helper))
7453 if (!TCR_4(__kmp_init_parallel))
7454 __kmp_parallel_initialize();
7458 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7459 if (TCR_4(__kmp_init_hidden_helper)) {
7460 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7465 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7469 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7472 __kmp_do_initialize_hidden_helper_threads();
7475 __kmp_hidden_helper_threads_initz_wait();
7478 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7480 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7485void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7487 kmp_disp_t *dispatch;
7492 this_thr->th.th_local.this_construct = 0;
7494 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7496 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7497 KMP_DEBUG_ASSERT(dispatch);
7498 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7502 dispatch->th_disp_index = 0;
7503 dispatch->th_doacross_buf_idx = 0;
7504 if (__kmp_env_consistency_check)
7505 __kmp_push_parallel(gtid, team->t.t_ident);
7510void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7512 if (__kmp_env_consistency_check)
7513 __kmp_pop_parallel(gtid, team->t.t_ident);
7515 __kmp_finish_implicit_task(this_thr);
7518int __kmp_invoke_task_func(
int gtid) {
7520 int tid = __kmp_tid_from_gtid(gtid);
7521 kmp_info_t *this_thr = __kmp_threads[gtid];
7522 kmp_team_t *team = this_thr->th.th_team;
7524 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7526 if (__itt_stack_caller_create_ptr) {
7528 if (team->t.t_stack_id != NULL) {
7529 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7531 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7532 __kmp_itt_stack_callee_enter(
7533 (__itt_caller)team->t.t_parent->t.t_stack_id);
7537#if INCLUDE_SSC_MARKS
7538 SSC_MARK_INVOKING();
7543 void **exit_frame_p;
7544 ompt_data_t *my_task_data;
7545 ompt_data_t *my_parallel_data;
7548 if (ompt_enabled.enabled) {
7549 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7550 .ompt_task_info.frame.exit_frame.ptr);
7552 exit_frame_p = &dummy;
7556 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7557 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7558 if (ompt_enabled.ompt_callback_implicit_task) {
7559 ompt_team_size = team->t.t_nproc;
7560 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7561 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7562 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7563 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7567#if KMP_STATS_ENABLED
7569 if (previous_state == stats_state_e::TEAMS_REGION) {
7570 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7572 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7574 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7577 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7578 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7585 *exit_frame_p = NULL;
7586 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7589#if KMP_STATS_ENABLED
7590 if (previous_state == stats_state_e::TEAMS_REGION) {
7591 KMP_SET_THREAD_STATE(previous_state);
7593 KMP_POP_PARTITIONED_TIMER();
7597 if (__itt_stack_caller_create_ptr) {
7599 if (team->t.t_stack_id != NULL) {
7600 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7602 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7603 __kmp_itt_stack_callee_leave(
7604 (__itt_caller)team->t.t_parent->t.t_stack_id);
7608 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7613void __kmp_teams_master(
int gtid) {
7615 kmp_info_t *thr = __kmp_threads[gtid];
7616 kmp_team_t *team = thr->th.th_team;
7617 ident_t *loc = team->t.t_ident;
7618 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7619 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7620 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7621 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7622 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7625 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7628 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7629 tmp->cg_nthreads = 1;
7630 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7631 " cg_nthreads to 1\n",
7633 tmp->up = thr->th.th_cg_roots;
7634 thr->th.th_cg_roots = tmp;
7638#if INCLUDE_SSC_MARKS
7641 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7642 (microtask_t)thr->th.th_teams_microtask,
7643 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7644#if INCLUDE_SSC_MARKS
7648 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7649 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7652 __kmp_join_call(loc, gtid
7661int __kmp_invoke_teams_master(
int gtid) {
7662 kmp_info_t *this_thr = __kmp_threads[gtid];
7663 kmp_team_t *team = this_thr->th.th_team;
7665 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7666 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7667 (
void *)__kmp_teams_master);
7669 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7671 int tid = __kmp_tid_from_gtid(gtid);
7672 ompt_data_t *task_data =
7673 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7674 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7675 if (ompt_enabled.ompt_callback_implicit_task) {
7676 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7677 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7679 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7682 __kmp_teams_master(gtid);
7684 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7686 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7695void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7696 kmp_info_t *thr = __kmp_threads[gtid];
7698 if (num_threads > 0)
7699 thr->th.th_set_nproc = num_threads;
7702static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7704 KMP_DEBUG_ASSERT(thr);
7706 if (!TCR_4(__kmp_init_middle))
7707 __kmp_middle_initialize();
7708 __kmp_assign_root_init_mask();
7709 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7710 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7712 if (num_threads == 0) {
7713 if (__kmp_teams_thread_limit > 0) {
7714 num_threads = __kmp_teams_thread_limit;
7716 num_threads = __kmp_avail_proc / num_teams;
7721 if (num_threads > __kmp_dflt_team_nth) {
7722 num_threads = __kmp_dflt_team_nth;
7724 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7725 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7727 if (num_teams * num_threads > __kmp_teams_max_nth) {
7728 num_threads = __kmp_teams_max_nth / num_teams;
7730 if (num_threads == 0) {
7734 if (num_threads < 0) {
7735 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7741 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7743 if (num_threads > __kmp_dflt_team_nth) {
7744 num_threads = __kmp_dflt_team_nth;
7746 if (num_teams * num_threads > __kmp_teams_max_nth) {
7747 int new_threads = __kmp_teams_max_nth / num_teams;
7748 if (new_threads == 0) {
7751 if (new_threads != num_threads) {
7752 if (!__kmp_reserve_warn) {
7753 __kmp_reserve_warn = 1;
7754 __kmp_msg(kmp_ms_warning,
7755 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7756 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7759 num_threads = new_threads;
7762 thr->th.th_teams_size.nth = num_threads;
7767void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7769 kmp_info_t *thr = __kmp_threads[gtid];
7770 if (num_teams < 0) {
7773 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7777 if (num_teams == 0) {
7778 if (__kmp_nteams > 0) {
7779 num_teams = __kmp_nteams;
7784 if (num_teams > __kmp_teams_max_nth) {
7785 if (!__kmp_reserve_warn) {
7786 __kmp_reserve_warn = 1;
7787 __kmp_msg(kmp_ms_warning,
7788 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7789 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7791 num_teams = __kmp_teams_max_nth;
7795 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7797 __kmp_push_thread_limit(thr, num_teams, num_threads);
7802void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7803 int num_teams_ub,
int num_threads) {
7804 kmp_info_t *thr = __kmp_threads[gtid];
7805 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7806 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7807 KMP_DEBUG_ASSERT(num_threads >= 0);
7809 if (num_teams_lb > num_teams_ub) {
7810 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7811 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7816 if (num_teams_lb == 0 && num_teams_ub > 0)
7817 num_teams_lb = num_teams_ub;
7819 if (num_teams_lb == 0 && num_teams_ub == 0) {
7820 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7821 if (num_teams > __kmp_teams_max_nth) {
7822 if (!__kmp_reserve_warn) {
7823 __kmp_reserve_warn = 1;
7824 __kmp_msg(kmp_ms_warning,
7825 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7826 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7828 num_teams = __kmp_teams_max_nth;
7830 }
else if (num_teams_lb == num_teams_ub) {
7831 num_teams = num_teams_ub;
7833 if (num_threads <= 0) {
7834 if (num_teams_ub > __kmp_teams_max_nth) {
7835 num_teams = num_teams_lb;
7837 num_teams = num_teams_ub;
7840 num_teams = (num_threads > __kmp_teams_max_nth)
7842 : __kmp_teams_max_nth / num_threads;
7843 if (num_teams < num_teams_lb) {
7844 num_teams = num_teams_lb;
7845 }
else if (num_teams > num_teams_ub) {
7846 num_teams = num_teams_ub;
7852 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7854 __kmp_push_thread_limit(thr, num_teams, num_threads);
7858void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7859 kmp_info_t *thr = __kmp_threads[gtid];
7860 thr->th.th_set_proc_bind = proc_bind;
7865void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7866 kmp_info_t *this_thr = __kmp_threads[gtid];
7872 KMP_DEBUG_ASSERT(team);
7873 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7874 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7877 team->t.t_construct = 0;
7878 team->t.t_ordered.dt.t_value =
7882 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7883 if (team->t.t_max_nproc > 1) {
7885 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7886 team->t.t_disp_buffer[i].buffer_index = i;
7887 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7890 team->t.t_disp_buffer[0].buffer_index = 0;
7891 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7895 KMP_ASSERT(this_thr->th.th_team == team);
7898 for (f = 0; f < team->t.t_nproc; f++) {
7899 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7900 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7905 __kmp_fork_barrier(gtid, 0);
7908void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7909 kmp_info_t *this_thr = __kmp_threads[gtid];
7911 KMP_DEBUG_ASSERT(team);
7912 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7913 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7919 if (__kmp_threads[gtid] &&
7920 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7921 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7922 __kmp_threads[gtid]);
7923 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7924 "team->t.t_nproc=%d\n",
7925 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7927 __kmp_print_structure();
7929 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7930 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7933 __kmp_join_barrier(gtid);
7935 if (ompt_enabled.enabled &&
7936 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7937 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7938 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7939 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7941 void *codeptr = NULL;
7942 if (KMP_MASTER_TID(ds_tid) &&
7943 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7944 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7945 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7947 if (ompt_enabled.ompt_callback_sync_region_wait) {
7948 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7949 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7952 if (ompt_enabled.ompt_callback_sync_region) {
7953 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7954 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7958 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7959 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7960 ompt_scope_end, NULL, task_data, 0, ds_tid,
7961 ompt_task_implicit);
7967 KMP_ASSERT(this_thr->th.th_team == team);
7972#ifdef USE_LOAD_BALANCE
7976static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7979 kmp_team_t *hot_team;
7981 if (root->r.r_active) {
7984 hot_team = root->r.r_hot_team;
7985 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7986 return hot_team->t.t_nproc - 1;
7991 for (i = 1; i < hot_team->t.t_nproc; i++) {
7992 if (hot_team->t.t_threads[i]->th.th_active) {
8001static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8004 int hot_team_active;
8005 int team_curr_active;
8008 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8010 KMP_DEBUG_ASSERT(root);
8011 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8012 ->th.th_current_task->td_icvs.dynamic == TRUE);
8013 KMP_DEBUG_ASSERT(set_nproc > 1);
8015 if (set_nproc == 1) {
8016 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8025 pool_active = __kmp_thread_pool_active_nth;
8026 hot_team_active = __kmp_active_hot_team_nproc(root);
8027 team_curr_active = pool_active + hot_team_active + 1;
8030 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8031 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8032 "hot team active = %d\n",
8033 system_active, pool_active, hot_team_active));
8035 if (system_active < 0) {
8039 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8040 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8043 retval = __kmp_avail_proc - __kmp_nth +
8044 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8045 if (retval > set_nproc) {
8048 if (retval < KMP_MIN_NTH) {
8049 retval = KMP_MIN_NTH;
8052 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8060 if (system_active < team_curr_active) {
8061 system_active = team_curr_active;
8063 retval = __kmp_avail_proc - system_active + team_curr_active;
8064 if (retval > set_nproc) {
8067 if (retval < KMP_MIN_NTH) {
8068 retval = KMP_MIN_NTH;
8071 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8080void __kmp_cleanup(
void) {
8083 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8085 if (TCR_4(__kmp_init_parallel)) {
8086#if KMP_HANDLE_SIGNALS
8087 __kmp_remove_signals();
8089 TCW_4(__kmp_init_parallel, FALSE);
8092 if (TCR_4(__kmp_init_middle)) {
8093#if KMP_AFFINITY_SUPPORTED
8094 __kmp_affinity_uninitialize();
8096 __kmp_cleanup_hierarchy();
8097 TCW_4(__kmp_init_middle, FALSE);
8100 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8102 if (__kmp_init_serial) {
8103 __kmp_runtime_destroy();
8104 __kmp_init_serial = FALSE;
8107 __kmp_cleanup_threadprivate_caches();
8109 for (f = 0; f < __kmp_threads_capacity; f++) {
8110 if (__kmp_root[f] != NULL) {
8111 __kmp_free(__kmp_root[f]);
8112 __kmp_root[f] = NULL;
8115 __kmp_free(__kmp_threads);
8118 __kmp_threads = NULL;
8120 __kmp_threads_capacity = 0;
8123 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8125 kmp_old_threads_list_t *next = ptr->next;
8126 __kmp_free(ptr->threads);
8131#if KMP_USE_DYNAMIC_LOCK
8132 __kmp_cleanup_indirect_user_locks();
8134 __kmp_cleanup_user_locks();
8138 __kmp_free(ompd_env_block);
8139 ompd_env_block = NULL;
8140 ompd_env_block_size = 0;
8144#if KMP_AFFINITY_SUPPORTED
8145 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8146 __kmp_cpuinfo_file = NULL;
8149#if KMP_USE_ADAPTIVE_LOCKS
8150#if KMP_DEBUG_ADAPTIVE_LOCKS
8151 __kmp_print_speculative_stats();
8154 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8155 __kmp_nested_nth.nth = NULL;
8156 __kmp_nested_nth.size = 0;
8157 __kmp_nested_nth.used = 0;
8158 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8159 __kmp_nested_proc_bind.bind_types = NULL;
8160 __kmp_nested_proc_bind.size = 0;
8161 __kmp_nested_proc_bind.used = 0;
8162 if (__kmp_affinity_format) {
8163 KMP_INTERNAL_FREE(__kmp_affinity_format);
8164 __kmp_affinity_format = NULL;
8167 __kmp_i18n_catclose();
8169#if KMP_USE_HIER_SCHED
8170 __kmp_hier_scheds.deallocate();
8173#if KMP_STATS_ENABLED
8177 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8182int __kmp_ignore_mppbeg(
void) {
8185 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8186 if (__kmp_str_match_false(env))
8193int __kmp_ignore_mppend(
void) {
8196 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8197 if (__kmp_str_match_false(env))
8204void __kmp_internal_begin(
void) {
8210 gtid = __kmp_entry_gtid();
8211 root = __kmp_threads[gtid]->th.th_root;
8212 KMP_ASSERT(KMP_UBER_GTID(gtid));
8214 if (root->r.r_begin)
8216 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8217 if (root->r.r_begin) {
8218 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8222 root->r.r_begin = TRUE;
8224 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8229void __kmp_user_set_library(
enum library_type arg) {
8236 gtid = __kmp_entry_gtid();
8237 thread = __kmp_threads[gtid];
8239 root = thread->th.th_root;
8241 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8243 if (root->r.r_in_parallel) {
8245 KMP_WARNING(SetLibraryIncorrectCall);
8250 case library_serial:
8251 thread->th.th_set_nproc = 0;
8252 set__nproc(thread, 1);
8254 case library_turnaround:
8255 thread->th.th_set_nproc = 0;
8256 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8257 : __kmp_dflt_team_nth_ub);
8259 case library_throughput:
8260 thread->th.th_set_nproc = 0;
8261 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8262 : __kmp_dflt_team_nth_ub);
8265 KMP_FATAL(UnknownLibraryType, arg);
8268 __kmp_aux_set_library(arg);
8271void __kmp_aux_set_stacksize(
size_t arg) {
8272 if (!__kmp_init_serial)
8273 __kmp_serial_initialize();
8276 if (arg & (0x1000 - 1)) {
8277 arg &= ~(0x1000 - 1);
8282 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8285 if (!TCR_4(__kmp_init_parallel)) {
8288 if (value < __kmp_sys_min_stksize)
8289 value = __kmp_sys_min_stksize;
8290 else if (value > KMP_MAX_STKSIZE)
8291 value = KMP_MAX_STKSIZE;
8293 __kmp_stksize = value;
8295 __kmp_env_stksize = TRUE;
8298 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8303void __kmp_aux_set_library(
enum library_type arg) {
8304 __kmp_library = arg;
8306 switch (__kmp_library) {
8307 case library_serial: {
8308 KMP_INFORM(LibraryIsSerial);
8310 case library_turnaround:
8311 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8312 __kmp_use_yield = 2;
8314 case library_throughput:
8315 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8316 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8319 KMP_FATAL(UnknownLibraryType, arg);
8325static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8326 kmp_info_t *thr = __kmp_entry_thread();
8327 teams_serialized = 0;
8328 if (thr->th.th_teams_microtask) {
8329 kmp_team_t *team = thr->th.th_team;
8330 int tlevel = thr->th.th_teams_level;
8331 int ii = team->t.t_level;
8332 teams_serialized = team->t.t_serialized;
8333 int level = tlevel + 1;
8334 KMP_DEBUG_ASSERT(ii >= tlevel);
8335 while (ii > level) {
8336 for (teams_serialized = team->t.t_serialized;
8337 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8339 if (team->t.t_serialized && (!teams_serialized)) {
8340 team = team->t.t_parent;
8344 team = team->t.t_parent;
8353int __kmp_aux_get_team_num() {
8355 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8357 if (serialized > 1) {
8360 return team->t.t_master_tid;
8366int __kmp_aux_get_num_teams() {
8368 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8370 if (serialized > 1) {
8373 return team->t.t_parent->t.t_nproc;
8412typedef struct kmp_affinity_format_field_t {
8414 const char *long_name;
8417} kmp_affinity_format_field_t;
8419static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8420#if KMP_AFFINITY_SUPPORTED
8421 {
'A',
"thread_affinity",
's'},
8423 {
't',
"team_num",
'd'},
8424 {
'T',
"num_teams",
'd'},
8425 {
'L',
"nesting_level",
'd'},
8426 {
'n',
"thread_num",
'd'},
8427 {
'N',
"num_threads",
'd'},
8428 {
'a',
"ancestor_tnum",
'd'},
8430 {
'P',
"process_id",
'd'},
8431 {
'i',
"native_thread_id",
'd'}};
8434static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8436 kmp_str_buf_t *field_buffer) {
8437 int rc, format_index, field_value;
8438 const char *width_left, *width_right;
8439 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8440 static const int FORMAT_SIZE = 20;
8441 char format[FORMAT_SIZE] = {0};
8442 char absolute_short_name = 0;
8444 KMP_DEBUG_ASSERT(gtid >= 0);
8445 KMP_DEBUG_ASSERT(th);
8446 KMP_DEBUG_ASSERT(**ptr ==
'%');
8447 KMP_DEBUG_ASSERT(field_buffer);
8449 __kmp_str_buf_clear(field_buffer);
8456 __kmp_str_buf_cat(field_buffer,
"%", 1);
8467 right_justify =
false;
8469 right_justify =
true;
8473 width_left = width_right = NULL;
8474 if (**ptr >=
'0' && **ptr <=
'9') {
8482 format[format_index++] =
'%';
8484 format[format_index++] =
'-';
8486 format[format_index++] =
'0';
8487 if (width_left && width_right) {
8491 while (i < 8 && width_left < width_right) {
8492 format[format_index++] = *width_left;
8500 found_valid_name =
false;
8501 parse_long_name = (**ptr ==
'{');
8502 if (parse_long_name)
8504 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8505 sizeof(__kmp_affinity_format_table[0]);
8507 char short_name = __kmp_affinity_format_table[i].short_name;
8508 const char *long_name = __kmp_affinity_format_table[i].long_name;
8509 char field_format = __kmp_affinity_format_table[i].field_format;
8510 if (parse_long_name) {
8511 size_t length = KMP_STRLEN(long_name);
8512 if (strncmp(*ptr, long_name, length) == 0) {
8513 found_valid_name =
true;
8516 }
else if (**ptr == short_name) {
8517 found_valid_name =
true;
8520 if (found_valid_name) {
8521 format[format_index++] = field_format;
8522 format[format_index++] =
'\0';
8523 absolute_short_name = short_name;
8527 if (parse_long_name) {
8529 absolute_short_name = 0;
8537 switch (absolute_short_name) {
8539 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8542 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8545 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8548 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8551 static const int BUFFER_SIZE = 256;
8552 char buf[BUFFER_SIZE];
8553 __kmp_expand_host_name(buf, BUFFER_SIZE);
8554 rc = __kmp_str_buf_print(field_buffer, format, buf);
8557 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8560 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8563 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8567 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8568 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8570#if KMP_AFFINITY_SUPPORTED
8573 __kmp_str_buf_init(&buf);
8574 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8575 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8576 __kmp_str_buf_free(&buf);
8582 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8584 if (parse_long_name) {
8593 KMP_ASSERT(format_index <= FORMAT_SIZE);
8603size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8604 kmp_str_buf_t *buffer) {
8605 const char *parse_ptr;
8607 const kmp_info_t *th;
8608 kmp_str_buf_t field;
8610 KMP_DEBUG_ASSERT(buffer);
8611 KMP_DEBUG_ASSERT(gtid >= 0);
8613 __kmp_str_buf_init(&field);
8614 __kmp_str_buf_clear(buffer);
8616 th = __kmp_threads[gtid];
8622 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8623 parse_ptr = __kmp_affinity_format;
8625 KMP_DEBUG_ASSERT(parse_ptr);
8627 while (*parse_ptr !=
'\0') {
8629 if (*parse_ptr ==
'%') {
8631 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8632 __kmp_str_buf_catbuf(buffer, &field);
8636 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8641 __kmp_str_buf_free(&field);
8646void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8648 __kmp_str_buf_init(&buf);
8649 __kmp_aux_capture_affinity(gtid, format, &buf);
8650 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8651 __kmp_str_buf_free(&buf);
8656void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8657 int blocktime = arg;
8663 __kmp_save_internal_controls(thread);
8666 if (blocktime < KMP_MIN_BLOCKTIME)
8667 blocktime = KMP_MIN_BLOCKTIME;
8668 else if (blocktime > KMP_MAX_BLOCKTIME)
8669 blocktime = KMP_MAX_BLOCKTIME;
8671 set__blocktime_team(thread->th.th_team, tid, blocktime);
8672 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8676 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8678 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8679 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8685 set__bt_set_team(thread->th.th_team, tid, bt_set);
8686 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8688 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8689 "bt_intervals=%d, monitor_updates=%d\n",
8690 __kmp_gtid_from_tid(tid, thread->th.th_team),
8691 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8692 __kmp_monitor_wakeups));
8694 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8695 __kmp_gtid_from_tid(tid, thread->th.th_team),
8696 thread->th.th_team->t.t_id, tid, blocktime));
8700void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8701 if (!__kmp_init_serial) {
8702 __kmp_serial_initialize();
8704 __kmp_env_initialize(str);
8706 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8714PACKED_REDUCTION_METHOD_T
8715__kmp_determine_reduction_method(
8716 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8717 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8718 kmp_critical_name *lck) {
8729 PACKED_REDUCTION_METHOD_T retval;
8733 KMP_DEBUG_ASSERT(loc);
8734 KMP_DEBUG_ASSERT(lck);
8736#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8738 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8739#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8741 retval = critical_reduce_block;
8744 team_size = __kmp_get_team_num_threads(global_tid);
8745 if (team_size == 1) {
8747 retval = empty_reduce_block;
8751 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8753#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8754 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8756#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8757 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8759 int teamsize_cutoff = 4;
8761#if KMP_MIC_SUPPORTED
8762 if (__kmp_mic_type != non_mic) {
8763 teamsize_cutoff = 8;
8766 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8767 if (tree_available) {
8768 if (team_size <= teamsize_cutoff) {
8769 if (atomic_available) {
8770 retval = atomic_reduce_block;
8773 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8775 }
else if (atomic_available) {
8776 retval = atomic_reduce_block;
8779#error "Unknown or unsupported OS"
8783#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8785#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8789 if (atomic_available) {
8790 if (num_vars <= 2) {
8791 retval = atomic_reduce_block;
8797 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8798 if (atomic_available && (num_vars <= 3)) {
8799 retval = atomic_reduce_block;
8800 }
else if (tree_available) {
8801 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8802 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8803 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8808#error "Unknown or unsupported OS"
8812#error "Unknown or unsupported architecture"
8820 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8823 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8825 int atomic_available, tree_available;
8827 switch ((forced_retval = __kmp_force_reduction_method)) {
8828 case critical_reduce_block:
8832 case atomic_reduce_block:
8833 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8834 if (!atomic_available) {
8835 KMP_WARNING(RedMethodNotSupported,
"atomic");
8836 forced_retval = critical_reduce_block;
8840 case tree_reduce_block:
8841 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8842 if (!tree_available) {
8843 KMP_WARNING(RedMethodNotSupported,
"tree");
8844 forced_retval = critical_reduce_block;
8846#if KMP_FAST_REDUCTION_BARRIER
8847 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8856 retval = forced_retval;
8859 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8861#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8862#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8867kmp_int32 __kmp_get_reduce_method(
void) {
8868 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8873void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8877void __kmp_hard_pause() {
8878 __kmp_pause_status = kmp_hard_paused;
8879 __kmp_internal_end_thread(-1);
8883void __kmp_resume_if_soft_paused() {
8884 if (__kmp_pause_status == kmp_soft_paused) {
8885 __kmp_pause_status = kmp_not_paused;
8887 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8888 kmp_info_t *thread = __kmp_threads[gtid];
8890 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8892 if (fl.is_sleeping())
8894 else if (__kmp_try_suspend_mx(thread)) {
8895 __kmp_unlock_suspend_mx(thread);
8898 if (fl.is_sleeping()) {
8901 }
else if (__kmp_try_suspend_mx(thread)) {
8902 __kmp_unlock_suspend_mx(thread);
8914int __kmp_pause_resource(kmp_pause_status_t level) {
8915 if (level == kmp_not_paused) {
8916 if (__kmp_pause_status == kmp_not_paused) {
8920 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8921 __kmp_pause_status == kmp_hard_paused);
8922 __kmp_pause_status = kmp_not_paused;
8925 }
else if (level == kmp_soft_paused) {
8926 if (__kmp_pause_status != kmp_not_paused) {
8933 }
else if (level == kmp_hard_paused) {
8934 if (__kmp_pause_status != kmp_not_paused) {
8947void __kmp_omp_display_env(
int verbose) {
8948 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8949 if (__kmp_init_serial == 0)
8950 __kmp_do_serial_initialize();
8951 __kmp_display_env_impl(!verbose, verbose);
8952 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8956void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8958 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8960 kmp_info_t **other_threads = team->t.t_threads;
8964 for (
int f = 1; f < old_nthreads; ++f) {
8965 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8967 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8973 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8974 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8978 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8980 team->t.t_threads[f]->th.th_used_in_team.store(2);
8981 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8984 team->t.b->go_release();
8990 int count = old_nthreads - 1;
8992 count = old_nthreads - 1;
8993 for (
int f = 1; f < old_nthreads; ++f) {
8994 if (other_threads[f]->th.th_used_in_team.load() != 0) {
8995 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8996 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
8997 void *, other_threads[f]->th.th_sleep_loc);
8998 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9001 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9007 team->t.b->update_num_threads(new_nthreads);
9008 team->t.b->go_reset();
9011void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9013 KMP_DEBUG_ASSERT(team);
9019 for (
int f = 1; f < new_nthreads; ++f) {
9020 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9021 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9023 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9024 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9025 (kmp_flag_32<false, false> *)NULL);
9031 int count = new_nthreads - 1;
9033 count = new_nthreads - 1;
9034 for (
int f = 1; f < new_nthreads; ++f) {
9035 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9043kmp_info_t **__kmp_hidden_helper_threads;
9044kmp_info_t *__kmp_hidden_helper_main_thread;
9045std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9047kmp_int32 __kmp_hidden_helper_threads_num = 8;
9048kmp_int32 __kmp_enable_hidden_helper = TRUE;
9050kmp_int32 __kmp_hidden_helper_threads_num = 0;
9051kmp_int32 __kmp_enable_hidden_helper = FALSE;
9055std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9057void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9062 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9063 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9064 __kmp_hidden_helper_threads_num)
9070 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9071 __kmp_hidden_helper_initz_release();
9072 __kmp_hidden_helper_main_thread_wait();
9074 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9075 __kmp_hidden_helper_worker_thread_signal();
9081void __kmp_hidden_helper_threads_initz_routine() {
9083 const int gtid = __kmp_register_root(TRUE);
9084 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9085 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9086 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9087 __kmp_hidden_helper_threads_num;
9089 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9094 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9096 __kmp_hidden_helper_threads_deinitz_release();
9116void __kmp_init_nesting_mode() {
9117 int levels = KMP_HW_LAST;
9118 __kmp_nesting_mode_nlevels = levels;
9119 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9120 for (
int i = 0; i < levels; ++i)
9121 __kmp_nesting_nth_level[i] = 0;
9122 if (__kmp_nested_nth.size < levels) {
9123 __kmp_nested_nth.nth =
9124 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9125 __kmp_nested_nth.size = levels;
9130void __kmp_set_nesting_mode_threads() {
9131 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9133 if (__kmp_nesting_mode == 1)
9134 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9135 else if (__kmp_nesting_mode > 1)
9136 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9138 if (__kmp_topology) {
9140 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9141 loc < __kmp_nesting_mode_nlevels;
9142 loc++, hw_level++) {
9143 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9144 if (__kmp_nesting_nth_level[loc] == 1)
9148 if (__kmp_nesting_mode > 1 && loc > 1) {
9149 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9150 int num_cores = __kmp_topology->get_count(core_level);
9151 int upper_levels = 1;
9152 for (
int level = 0; level < loc - 1; ++level)
9153 upper_levels *= __kmp_nesting_nth_level[level];
9154 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9155 __kmp_nesting_nth_level[loc - 1] =
9156 num_cores / __kmp_nesting_nth_level[loc - 2];
9158 __kmp_nesting_mode_nlevels = loc;
9159 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9161 if (__kmp_avail_proc >= 4) {
9162 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9163 __kmp_nesting_nth_level[1] = 2;
9164 __kmp_nesting_mode_nlevels = 2;
9166 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9167 __kmp_nesting_mode_nlevels = 1;
9169 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9171 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9172 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9174 set__nproc(thread, __kmp_nesting_nth_level[0]);
9175 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9176 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9177 if (get__max_active_levels(thread) > 1) {
9179 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9181 if (__kmp_nesting_mode == 1)
9182 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)