LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295#endif
296
297#if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304#if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap);
315
316#if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330#endif // KMP_STATS_ENABLED
331}
332
344void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345 kmp_int32 num_teams, kmp_int32 num_threads) {
346 KA_TRACE(20,
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351}
352
369void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377 num_threads);
378}
379
390void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391 ...) {
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
394 va_list ap;
395 va_start(ap, microtask);
396
397#if KMP_STATS_ENABLED
398 KMP_COUNT_BLOCK(OMP_TEAMS);
399 stats_state_e previous_state = KMP_GET_THREAD_STATE();
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402 } else {
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404 }
405#endif
406
407 // remember teams entry point and nesting level
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411
412#if OMPT_SUPPORT
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418 }
419 OMPT_STORE_RETURN_ADDRESS(gtid);
420#endif
421
422 // check if __kmpc_push_num_teams called, set default number of teams
423 // otherwise
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
426 }
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430
431 __kmp_fork_call(
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
436#if OMPT_SUPPORT
437 ,
438 fork_context_intel
439#endif
440 );
441
442 // Pop current CG root off list
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
451 if (i == 1) { // check is we are the last thread in CG (not always the case)
452 __kmp_free(tmp);
453 }
454 // Restore current task's thread_limit from CG root
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
458
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462 va_end(ap);
463#if KMP_STATS_ENABLED
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
467 } else {
468 KMP_POP_PARTITIONED_TIMER();
469 }
470#endif // KMP_STATS_ENABLED
471}
472
473// I don't think this function should ever have been exported.
474// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475// openmp code ever called it, but it's been exported from the RTL for so
476// long that I'm afraid to remove the definition.
477int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478
491void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492 // The implementation is now in kmp_runtime.cpp so that it can share static
493 // functions with kmp_fork_call since the tasks to be done are similar in
494 // each case.
495 __kmp_assert_valid_gtid(global_tid);
496#if OMPT_SUPPORT
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
498#endif
499 __kmp_serialized_parallel(loc, global_tid);
500}
501
509void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
513
514 KC_TRACE(10,
515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516
517 /* skip all this code for autopar serialized loops since it results in
518 unacceptable overhead */
519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520 return;
521
522 // Not autopar code
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
526
527 __kmp_resume_if_soft_paused();
528
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
531
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
533 // we need to wait for the proxy tasks before finishing the thread
534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537
538 KMP_MB();
539 KMP_DEBUG_ASSERT(serial_team);
540 KMP_ASSERT(serial_team->t.t_serialized);
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545
546#if OMPT_SUPPORT
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554 }
555
556 // reset clear the task id only after unlinking the task
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid));
565 }
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568 }
569#endif
570
571 /* If necessary, pop the internal control stack values and replace the team
572 * values */
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
577 __kmp_free(top);
578 }
579
580 /* pop dispatch buffers stack */
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582 {
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer);
588 }
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
593
594 /* return to the parallel section */
595
596#if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
602#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
604 __kmp_pop_current_task_from_thread(this_thr);
605#if OMPD_SUPPORT
606 if (ompd_state & OMPD_ENABLE_BP)
607 ompd_bp_parallel_end();
608#endif
609
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612
613 /* restore values cached in the thread */
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618
619 /* TODO the below shouldn't need to be adjusted for serialized teams */
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624 this_thr->th.th_current_task->td_flags.executing = 1;
625
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 // Copy the task team from the new child / old parent team to the thread.
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 KA_TRACE(20,
631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632 "team %p\n",
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634 }
635#if KMP_AFFINITY_SUPPORTED
636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
637 __kmp_reset_root_init_mask(global_tid);
638 }
639#endif
640 } else {
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
645 }
646 }
647
648 serial_team->t.t_level--;
649 if (__kmp_env_consistency_check)
650 __kmp_pop_parallel(global_tid, NULL);
651#if OMPT_SUPPORT
652 if (ompt_enabled.enabled)
653 this_thr->th.ompt_thread_info.state =
654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655 : ompt_state_work_parallel);
656#endif
657}
658
668 KC_TRACE(10, ("__kmpc_flush: called\n"));
669
670 /* need explicit __mf() here since use volatile instead in library */
671 KMP_MB(); /* Flush all pending memory write invalidates. */
672
673#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
674#if KMP_MIC
675// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
676// We shouldn't need it, though, since the ABI rules require that
677// * If the compiler generates NGO stores it also generates the fence
678// * If users hand-code NGO stores they should insert the fence
679// therefore no incomplete unordered stores should be visible.
680#else
681 // C74404
682 // This is to address non-temporal store instructions (sfence needed).
683 // The clflush instruction is addressed either (mfence needed).
684 // Probably the non-temporal load monvtdqa instruction should also be
685 // addressed.
686 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
687 if (!__kmp_cpuinfo.initialized) {
688 __kmp_query_cpuid(&__kmp_cpuinfo);
689 }
690 if (!__kmp_cpuinfo.flags.sse2) {
691 // CPU cannot execute SSE2 instructions.
692 } else {
693#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
694 _mm_mfence();
695#elif KMP_COMPILER_MSVC
696 MemoryBarrier();
697#else
698 __sync_synchronize();
699#endif // KMP_COMPILER_ICC || KMP_COMPILER_ICX
700 }
701#endif // KMP_MIC
702#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
703 KMP_ARCH_RISCV64)
704// Nothing to see here move along
705#elif KMP_ARCH_PPC64
706// Nothing needed here (we have a real MB above).
707#else
708#error Unknown or unsupported architecture
709#endif
710
711#if OMPT_SUPPORT && OMPT_OPTIONAL
712 if (ompt_enabled.ompt_callback_flush) {
713 ompt_callbacks.ompt_callback(ompt_callback_flush)(
714 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
715 }
716#endif
717}
718
719/* -------------------------------------------------------------------------- */
727void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
728 KMP_COUNT_BLOCK(OMP_BARRIER);
729 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
730 __kmp_assert_valid_gtid(global_tid);
731
732 if (!TCR_4(__kmp_init_parallel))
733 __kmp_parallel_initialize();
734
735 __kmp_resume_if_soft_paused();
736
737 if (__kmp_env_consistency_check) {
738 if (loc == 0) {
739 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
740 }
741 __kmp_check_barrier(global_tid, ct_barrier, loc);
742 }
743
744#if OMPT_SUPPORT
745 ompt_frame_t *ompt_frame;
746 if (ompt_enabled.enabled) {
747 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
748 if (ompt_frame->enter_frame.ptr == NULL)
749 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
750 }
751 OMPT_STORE_RETURN_ADDRESS(global_tid);
752#endif
753 __kmp_threads[global_tid]->th.th_ident = loc;
754 // TODO: explicit barrier_wait_id:
755 // this function is called when 'barrier' directive is present or
756 // implicit barrier at the end of a worksharing construct.
757 // 1) better to add a per-thread barrier counter to a thread data structure
758 // 2) set to 0 when a new team is created
759 // 4) no sync is required
760
761 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
762#if OMPT_SUPPORT && OMPT_OPTIONAL
763 if (ompt_enabled.enabled) {
764 ompt_frame->enter_frame = ompt_data_none;
765 }
766#endif
767}
768
769/* The BARRIER for a MASTER section is always explicit */
776kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
777 int status = 0;
778
779 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
780 __kmp_assert_valid_gtid(global_tid);
781
782 if (!TCR_4(__kmp_init_parallel))
783 __kmp_parallel_initialize();
784
785 __kmp_resume_if_soft_paused();
786
787 if (KMP_MASTER_GTID(global_tid)) {
788 KMP_COUNT_BLOCK(OMP_MASTER);
789 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
790 status = 1;
791 }
792
793#if OMPT_SUPPORT && OMPT_OPTIONAL
794 if (status) {
795 if (ompt_enabled.ompt_callback_masked) {
796 kmp_info_t *this_thr = __kmp_threads[global_tid];
797 kmp_team_t *team = this_thr->th.th_team;
798
799 int tid = __kmp_tid_from_gtid(global_tid);
800 ompt_callbacks.ompt_callback(ompt_callback_masked)(
801 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
802 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
803 OMPT_GET_RETURN_ADDRESS(0));
804 }
805 }
806#endif
807
808 if (__kmp_env_consistency_check) {
809#if KMP_USE_DYNAMIC_LOCK
810 if (status)
811 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
812 else
813 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
814#else
815 if (status)
816 __kmp_push_sync(global_tid, ct_master, loc, NULL);
817 else
818 __kmp_check_sync(global_tid, ct_master, loc, NULL);
819#endif
820 }
821
822 return status;
823}
824
833void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
834 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
835 __kmp_assert_valid_gtid(global_tid);
836 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
837 KMP_POP_PARTITIONED_TIMER();
838
839#if OMPT_SUPPORT && OMPT_OPTIONAL
840 kmp_info_t *this_thr = __kmp_threads[global_tid];
841 kmp_team_t *team = this_thr->th.th_team;
842 if (ompt_enabled.ompt_callback_masked) {
843 int tid = __kmp_tid_from_gtid(global_tid);
844 ompt_callbacks.ompt_callback(ompt_callback_masked)(
845 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
846 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
847 OMPT_GET_RETURN_ADDRESS(0));
848 }
849#endif
850
851 if (__kmp_env_consistency_check) {
852 if (KMP_MASTER_GTID(global_tid))
853 __kmp_pop_sync(global_tid, ct_master, loc);
854 }
855}
856
865kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
866 int status = 0;
867 int tid;
868 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
869 __kmp_assert_valid_gtid(global_tid);
870
871 if (!TCR_4(__kmp_init_parallel))
872 __kmp_parallel_initialize();
873
874 __kmp_resume_if_soft_paused();
875
876 tid = __kmp_tid_from_gtid(global_tid);
877 if (tid == filter) {
878 KMP_COUNT_BLOCK(OMP_MASKED);
879 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
880 status = 1;
881 }
882
883#if OMPT_SUPPORT && OMPT_OPTIONAL
884 if (status) {
885 if (ompt_enabled.ompt_callback_masked) {
886 kmp_info_t *this_thr = __kmp_threads[global_tid];
887 kmp_team_t *team = this_thr->th.th_team;
888 ompt_callbacks.ompt_callback(ompt_callback_masked)(
889 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
890 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
891 OMPT_GET_RETURN_ADDRESS(0));
892 }
893 }
894#endif
895
896 if (__kmp_env_consistency_check) {
897#if KMP_USE_DYNAMIC_LOCK
898 if (status)
899 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
900 else
901 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
902#else
903 if (status)
904 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
905 else
906 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
907#endif
908 }
909
910 return status;
911}
912
921void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
922 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
923 __kmp_assert_valid_gtid(global_tid);
924 KMP_POP_PARTITIONED_TIMER();
925
926#if OMPT_SUPPORT && OMPT_OPTIONAL
927 kmp_info_t *this_thr = __kmp_threads[global_tid];
928 kmp_team_t *team = this_thr->th.th_team;
929 if (ompt_enabled.ompt_callback_masked) {
930 int tid = __kmp_tid_from_gtid(global_tid);
931 ompt_callbacks.ompt_callback(ompt_callback_masked)(
932 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
933 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
934 OMPT_GET_RETURN_ADDRESS(0));
935 }
936#endif
937
938 if (__kmp_env_consistency_check) {
939 __kmp_pop_sync(global_tid, ct_masked, loc);
940 }
941}
942
950void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
951 int cid = 0;
952 kmp_info_t *th;
953 KMP_DEBUG_ASSERT(__kmp_init_serial);
954
955 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
956 __kmp_assert_valid_gtid(gtid);
957
958 if (!TCR_4(__kmp_init_parallel))
959 __kmp_parallel_initialize();
960
961 __kmp_resume_if_soft_paused();
962
963#if USE_ITT_BUILD
964 __kmp_itt_ordered_prep(gtid);
965// TODO: ordered_wait_id
966#endif /* USE_ITT_BUILD */
967
968 th = __kmp_threads[gtid];
969
970#if OMPT_SUPPORT && OMPT_OPTIONAL
971 kmp_team_t *team;
972 ompt_wait_id_t lck;
973 void *codeptr_ra;
974 OMPT_STORE_RETURN_ADDRESS(gtid);
975 if (ompt_enabled.enabled) {
976 team = __kmp_team_from_gtid(gtid);
977 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
978 /* OMPT state update */
979 th->th.ompt_thread_info.wait_id = lck;
980 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
981
982 /* OMPT event callback */
983 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
984 if (ompt_enabled.ompt_callback_mutex_acquire) {
985 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
986 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
987 codeptr_ra);
988 }
989 }
990#endif
991
992 if (th->th.th_dispatch->th_deo_fcn != 0)
993 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
994 else
995 __kmp_parallel_deo(&gtid, &cid, loc);
996
997#if OMPT_SUPPORT && OMPT_OPTIONAL
998 if (ompt_enabled.enabled) {
999 /* OMPT state update */
1000 th->th.ompt_thread_info.state = ompt_state_work_parallel;
1001 th->th.ompt_thread_info.wait_id = 0;
1002
1003 /* OMPT event callback */
1004 if (ompt_enabled.ompt_callback_mutex_acquired) {
1005 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1006 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1007 }
1008 }
1009#endif
1010
1011#if USE_ITT_BUILD
1012 __kmp_itt_ordered_start(gtid);
1013#endif /* USE_ITT_BUILD */
1014}
1015
1023void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1024 int cid = 0;
1025 kmp_info_t *th;
1026
1027 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1028 __kmp_assert_valid_gtid(gtid);
1029
1030#if USE_ITT_BUILD
1031 __kmp_itt_ordered_end(gtid);
1032// TODO: ordered_wait_id
1033#endif /* USE_ITT_BUILD */
1034
1035 th = __kmp_threads[gtid];
1036
1037 if (th->th.th_dispatch->th_dxo_fcn != 0)
1038 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1039 else
1040 __kmp_parallel_dxo(&gtid, &cid, loc);
1041
1042#if OMPT_SUPPORT && OMPT_OPTIONAL
1043 OMPT_STORE_RETURN_ADDRESS(gtid);
1044 if (ompt_enabled.ompt_callback_mutex_released) {
1045 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1046 ompt_mutex_ordered,
1047 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1048 ->t.t_ordered.dt.t_value,
1049 OMPT_LOAD_RETURN_ADDRESS(gtid));
1050 }
1051#endif
1052}
1053
1054#if KMP_USE_DYNAMIC_LOCK
1055
1056static __forceinline void
1057__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1058 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1059 // Pointer to the allocated indirect lock is written to crit, while indexing
1060 // is ignored.
1061 void *idx;
1062 kmp_indirect_lock_t **lck;
1063 lck = (kmp_indirect_lock_t **)crit;
1064 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1065 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1066 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1067 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1068 KA_TRACE(20,
1069 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1070#if USE_ITT_BUILD
1071 __kmp_itt_critical_creating(ilk->lock, loc);
1072#endif
1073 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1074 if (status == 0) {
1075#if USE_ITT_BUILD
1076 __kmp_itt_critical_destroyed(ilk->lock);
1077#endif
1078 // We don't really need to destroy the unclaimed lock here since it will be
1079 // cleaned up at program exit.
1080 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1081 }
1082 KMP_DEBUG_ASSERT(*lck != NULL);
1083}
1084
1085// Fast-path acquire tas lock
1086#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1087 { \
1088 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1089 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1090 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1091 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1092 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1093 kmp_uint32 spins; \
1094 KMP_FSYNC_PREPARE(l); \
1095 KMP_INIT_YIELD(spins); \
1096 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1097 do { \
1098 if (TCR_4(__kmp_nth) > \
1099 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1100 KMP_YIELD(TRUE); \
1101 } else { \
1102 KMP_YIELD_SPIN(spins); \
1103 } \
1104 __kmp_spin_backoff(&backoff); \
1105 } while ( \
1106 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1107 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1108 } \
1109 KMP_FSYNC_ACQUIRED(l); \
1110 }
1111
1112// Fast-path test tas lock
1113#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1114 { \
1115 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1116 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1117 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1118 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1119 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1120 }
1121
1122// Fast-path release tas lock
1123#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1124 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1125
1126#if KMP_USE_FUTEX
1127
1128#include <sys/syscall.h>
1129#include <unistd.h>
1130#ifndef FUTEX_WAIT
1131#define FUTEX_WAIT 0
1132#endif
1133#ifndef FUTEX_WAKE
1134#define FUTEX_WAKE 1
1135#endif
1136
1137// Fast-path acquire futex lock
1138#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1139 { \
1140 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1141 kmp_int32 gtid_code = (gtid + 1) << 1; \
1142 KMP_MB(); \
1143 KMP_FSYNC_PREPARE(ftx); \
1144 kmp_int32 poll_val; \
1145 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1146 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1147 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1148 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1149 if (!cond) { \
1150 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1151 poll_val | \
1152 KMP_LOCK_BUSY(1, futex))) { \
1153 continue; \
1154 } \
1155 poll_val |= KMP_LOCK_BUSY(1, futex); \
1156 } \
1157 kmp_int32 rc; \
1158 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1159 NULL, NULL, 0)) != 0) { \
1160 continue; \
1161 } \
1162 gtid_code |= 1; \
1163 } \
1164 KMP_FSYNC_ACQUIRED(ftx); \
1165 }
1166
1167// Fast-path test futex lock
1168#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1169 { \
1170 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1171 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1172 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1173 KMP_FSYNC_ACQUIRED(ftx); \
1174 rc = TRUE; \
1175 } else { \
1176 rc = FALSE; \
1177 } \
1178 }
1179
1180// Fast-path release futex lock
1181#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1182 { \
1183 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1184 KMP_MB(); \
1185 KMP_FSYNC_RELEASING(ftx); \
1186 kmp_int32 poll_val = \
1187 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1188 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1189 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1190 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1191 } \
1192 KMP_MB(); \
1193 KMP_YIELD_OVERSUB(); \
1194 }
1195
1196#endif // KMP_USE_FUTEX
1197
1198#else // KMP_USE_DYNAMIC_LOCK
1199
1200static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1201 ident_t const *loc,
1202 kmp_int32 gtid) {
1203 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1204
1205 // Because of the double-check, the following load doesn't need to be volatile
1206 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1207
1208 if (lck == NULL) {
1209 void *idx;
1210
1211 // Allocate & initialize the lock.
1212 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1213 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1214 __kmp_init_user_lock_with_checks(lck);
1215 __kmp_set_user_lock_location(lck, loc);
1216#if USE_ITT_BUILD
1217 __kmp_itt_critical_creating(lck);
1218// __kmp_itt_critical_creating() should be called *before* the first usage
1219// of underlying lock. It is the only place where we can guarantee it. There
1220// are chances the lock will destroyed with no usage, but it is not a
1221// problem, because this is not real event seen by user but rather setting
1222// name for object (lock). See more details in kmp_itt.h.
1223#endif /* USE_ITT_BUILD */
1224
1225 // Use a cmpxchg instruction to slam the start of the critical section with
1226 // the lock pointer. If another thread beat us to it, deallocate the lock,
1227 // and use the lock that the other thread allocated.
1228 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1229
1230 if (status == 0) {
1231// Deallocate the lock and reload the value.
1232#if USE_ITT_BUILD
1233 __kmp_itt_critical_destroyed(lck);
1234// Let ITT know the lock is destroyed and the same memory location may be reused
1235// for another purpose.
1236#endif /* USE_ITT_BUILD */
1237 __kmp_destroy_user_lock_with_checks(lck);
1238 __kmp_user_lock_free(&idx, gtid, lck);
1239 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1240 KMP_DEBUG_ASSERT(lck != NULL);
1241 }
1242 }
1243 return lck;
1244}
1245
1246#endif // KMP_USE_DYNAMIC_LOCK
1247
1258void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1259 kmp_critical_name *crit) {
1260#if KMP_USE_DYNAMIC_LOCK
1261#if OMPT_SUPPORT && OMPT_OPTIONAL
1262 OMPT_STORE_RETURN_ADDRESS(global_tid);
1263#endif // OMPT_SUPPORT
1264 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1265#else
1266 KMP_COUNT_BLOCK(OMP_CRITICAL);
1267#if OMPT_SUPPORT && OMPT_OPTIONAL
1268 ompt_state_t prev_state = ompt_state_undefined;
1269 ompt_thread_info_t ti;
1270#endif
1271 kmp_user_lock_p lck;
1272
1273 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1274 __kmp_assert_valid_gtid(global_tid);
1275
1276 // TODO: add THR_OVHD_STATE
1277
1278 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1279 KMP_CHECK_USER_LOCK_INIT();
1280
1281 if ((__kmp_user_lock_kind == lk_tas) &&
1282 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1283 lck = (kmp_user_lock_p)crit;
1284 }
1285#if KMP_USE_FUTEX
1286 else if ((__kmp_user_lock_kind == lk_futex) &&
1287 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1288 lck = (kmp_user_lock_p)crit;
1289 }
1290#endif
1291 else { // ticket, queuing or drdpa
1292 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1293 }
1294
1295 if (__kmp_env_consistency_check)
1296 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1297
1298 // since the critical directive binds to all threads, not just the current
1299 // team we have to check this even if we are in a serialized team.
1300 // also, even if we are the uber thread, we still have to conduct the lock,
1301 // as we have to contend with sibling threads.
1302
1303#if USE_ITT_BUILD
1304 __kmp_itt_critical_acquiring(lck);
1305#endif /* USE_ITT_BUILD */
1306#if OMPT_SUPPORT && OMPT_OPTIONAL
1307 OMPT_STORE_RETURN_ADDRESS(gtid);
1308 void *codeptr_ra = NULL;
1309 if (ompt_enabled.enabled) {
1310 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1311 /* OMPT state update */
1312 prev_state = ti.state;
1313 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1314 ti.state = ompt_state_wait_critical;
1315
1316 /* OMPT event callback */
1317 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1318 if (ompt_enabled.ompt_callback_mutex_acquire) {
1319 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1320 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1321 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1322 }
1323 }
1324#endif
1325 // Value of 'crit' should be good for using as a critical_id of the critical
1326 // section directive.
1327 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1328
1329#if USE_ITT_BUILD
1330 __kmp_itt_critical_acquired(lck);
1331#endif /* USE_ITT_BUILD */
1332#if OMPT_SUPPORT && OMPT_OPTIONAL
1333 if (ompt_enabled.enabled) {
1334 /* OMPT state update */
1335 ti.state = prev_state;
1336 ti.wait_id = 0;
1337
1338 /* OMPT event callback */
1339 if (ompt_enabled.ompt_callback_mutex_acquired) {
1340 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1341 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1342 }
1343 }
1344#endif
1345 KMP_POP_PARTITIONED_TIMER();
1346
1347 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1348 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1349#endif // KMP_USE_DYNAMIC_LOCK
1350}
1351
1352#if KMP_USE_DYNAMIC_LOCK
1353
1354// Converts the given hint to an internal lock implementation
1355static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1356#if KMP_USE_TSX
1357#define KMP_TSX_LOCK(seq) lockseq_##seq
1358#else
1359#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1360#endif
1361
1362#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1363#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1364#else
1365#define KMP_CPUINFO_RTM 0
1366#endif
1367
1368 // Hints that do not require further logic
1369 if (hint & kmp_lock_hint_hle)
1370 return KMP_TSX_LOCK(hle);
1371 if (hint & kmp_lock_hint_rtm)
1372 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1373 if (hint & kmp_lock_hint_adaptive)
1374 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1375
1376 // Rule out conflicting hints first by returning the default lock
1377 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1378 return __kmp_user_lock_seq;
1379 if ((hint & omp_lock_hint_speculative) &&
1380 (hint & omp_lock_hint_nonspeculative))
1381 return __kmp_user_lock_seq;
1382
1383 // Do not even consider speculation when it appears to be contended
1384 if (hint & omp_lock_hint_contended)
1385 return lockseq_queuing;
1386
1387 // Uncontended lock without speculation
1388 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1389 return lockseq_tas;
1390
1391 // Use RTM lock for speculation
1392 if (hint & omp_lock_hint_speculative)
1393 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1394
1395 return __kmp_user_lock_seq;
1396}
1397
1398#if OMPT_SUPPORT && OMPT_OPTIONAL
1399#if KMP_USE_DYNAMIC_LOCK
1400static kmp_mutex_impl_t
1401__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1402 if (user_lock) {
1403 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1404 case 0:
1405 break;
1406#if KMP_USE_FUTEX
1407 case locktag_futex:
1408 return kmp_mutex_impl_queuing;
1409#endif
1410 case locktag_tas:
1411 return kmp_mutex_impl_spin;
1412#if KMP_USE_TSX
1413 case locktag_hle:
1414 case locktag_rtm_spin:
1415 return kmp_mutex_impl_speculative;
1416#endif
1417 default:
1418 return kmp_mutex_impl_none;
1419 }
1420 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1421 }
1422 KMP_ASSERT(ilock);
1423 switch (ilock->type) {
1424#if KMP_USE_TSX
1425 case locktag_adaptive:
1426 case locktag_rtm_queuing:
1427 return kmp_mutex_impl_speculative;
1428#endif
1429 case locktag_nested_tas:
1430 return kmp_mutex_impl_spin;
1431#if KMP_USE_FUTEX
1432 case locktag_nested_futex:
1433#endif
1434 case locktag_ticket:
1435 case locktag_queuing:
1436 case locktag_drdpa:
1437 case locktag_nested_ticket:
1438 case locktag_nested_queuing:
1439 case locktag_nested_drdpa:
1440 return kmp_mutex_impl_queuing;
1441 default:
1442 return kmp_mutex_impl_none;
1443 }
1444}
1445#else
1446// For locks without dynamic binding
1447static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1448 switch (__kmp_user_lock_kind) {
1449 case lk_tas:
1450 return kmp_mutex_impl_spin;
1451#if KMP_USE_FUTEX
1452 case lk_futex:
1453#endif
1454 case lk_ticket:
1455 case lk_queuing:
1456 case lk_drdpa:
1457 return kmp_mutex_impl_queuing;
1458#if KMP_USE_TSX
1459 case lk_hle:
1460 case lk_rtm_queuing:
1461 case lk_rtm_spin:
1462 case lk_adaptive:
1463 return kmp_mutex_impl_speculative;
1464#endif
1465 default:
1466 return kmp_mutex_impl_none;
1467 }
1468}
1469#endif // KMP_USE_DYNAMIC_LOCK
1470#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1471
1485void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1486 kmp_critical_name *crit, uint32_t hint) {
1487 KMP_COUNT_BLOCK(OMP_CRITICAL);
1488 kmp_user_lock_p lck;
1489#if OMPT_SUPPORT && OMPT_OPTIONAL
1490 ompt_state_t prev_state = ompt_state_undefined;
1491 ompt_thread_info_t ti;
1492 // This is the case, if called from __kmpc_critical:
1493 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1494 if (!codeptr)
1495 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1496#endif
1497
1498 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1499 __kmp_assert_valid_gtid(global_tid);
1500
1501 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1502 // Check if it is initialized.
1503 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1504 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1505 if (*lk == 0) {
1506 if (KMP_IS_D_LOCK(lockseq)) {
1507 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1508 KMP_GET_D_TAG(lockseq));
1509 } else {
1510 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1511 }
1512 }
1513 // Branch for accessing the actual lock object and set operation. This
1514 // branching is inevitable since this lock initialization does not follow the
1515 // normal dispatch path (lock table is not used).
1516 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1517 lck = (kmp_user_lock_p)lk;
1518 if (__kmp_env_consistency_check) {
1519 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1520 __kmp_map_hint_to_lock(hint));
1521 }
1522#if USE_ITT_BUILD
1523 __kmp_itt_critical_acquiring(lck);
1524#endif
1525#if OMPT_SUPPORT && OMPT_OPTIONAL
1526 if (ompt_enabled.enabled) {
1527 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1528 /* OMPT state update */
1529 prev_state = ti.state;
1530 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1531 ti.state = ompt_state_wait_critical;
1532
1533 /* OMPT event callback */
1534 if (ompt_enabled.ompt_callback_mutex_acquire) {
1535 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1536 ompt_mutex_critical, (unsigned int)hint,
1537 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1538 codeptr);
1539 }
1540 }
1541#endif
1542#if KMP_USE_INLINED_TAS
1543 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1544 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1545 } else
1546#elif KMP_USE_INLINED_FUTEX
1547 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1548 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1549 } else
1550#endif
1551 {
1552 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1553 }
1554 } else {
1555 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1556 lck = ilk->lock;
1557 if (__kmp_env_consistency_check) {
1558 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1559 __kmp_map_hint_to_lock(hint));
1560 }
1561#if USE_ITT_BUILD
1562 __kmp_itt_critical_acquiring(lck);
1563#endif
1564#if OMPT_SUPPORT && OMPT_OPTIONAL
1565 if (ompt_enabled.enabled) {
1566 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1567 /* OMPT state update */
1568 prev_state = ti.state;
1569 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1570 ti.state = ompt_state_wait_critical;
1571
1572 /* OMPT event callback */
1573 if (ompt_enabled.ompt_callback_mutex_acquire) {
1574 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1575 ompt_mutex_critical, (unsigned int)hint,
1576 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1577 codeptr);
1578 }
1579 }
1580#endif
1581 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1582 }
1583 KMP_POP_PARTITIONED_TIMER();
1584
1585#if USE_ITT_BUILD
1586 __kmp_itt_critical_acquired(lck);
1587#endif /* USE_ITT_BUILD */
1588#if OMPT_SUPPORT && OMPT_OPTIONAL
1589 if (ompt_enabled.enabled) {
1590 /* OMPT state update */
1591 ti.state = prev_state;
1592 ti.wait_id = 0;
1593
1594 /* OMPT event callback */
1595 if (ompt_enabled.ompt_callback_mutex_acquired) {
1596 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1597 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1598 }
1599 }
1600#endif
1601
1602 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1603 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1604} // __kmpc_critical_with_hint
1605
1606#endif // KMP_USE_DYNAMIC_LOCK
1607
1617void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1618 kmp_critical_name *crit) {
1619 kmp_user_lock_p lck;
1620
1621 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1622
1623#if KMP_USE_DYNAMIC_LOCK
1624 int locktag = KMP_EXTRACT_D_TAG(crit);
1625 if (locktag) {
1626 lck = (kmp_user_lock_p)crit;
1627 KMP_ASSERT(lck != NULL);
1628 if (__kmp_env_consistency_check) {
1629 __kmp_pop_sync(global_tid, ct_critical, loc);
1630 }
1631#if USE_ITT_BUILD
1632 __kmp_itt_critical_releasing(lck);
1633#endif
1634#if KMP_USE_INLINED_TAS
1635 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1636 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1637 } else
1638#elif KMP_USE_INLINED_FUTEX
1639 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1640 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1641 } else
1642#endif
1643 {
1644 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1645 }
1646 } else {
1647 kmp_indirect_lock_t *ilk =
1648 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1649 KMP_ASSERT(ilk != NULL);
1650 lck = ilk->lock;
1651 if (__kmp_env_consistency_check) {
1652 __kmp_pop_sync(global_tid, ct_critical, loc);
1653 }
1654#if USE_ITT_BUILD
1655 __kmp_itt_critical_releasing(lck);
1656#endif
1657 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1658 }
1659
1660#else // KMP_USE_DYNAMIC_LOCK
1661
1662 if ((__kmp_user_lock_kind == lk_tas) &&
1663 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1664 lck = (kmp_user_lock_p)crit;
1665 }
1666#if KMP_USE_FUTEX
1667 else if ((__kmp_user_lock_kind == lk_futex) &&
1668 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1669 lck = (kmp_user_lock_p)crit;
1670 }
1671#endif
1672 else { // ticket, queuing or drdpa
1673 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1674 }
1675
1676 KMP_ASSERT(lck != NULL);
1677
1678 if (__kmp_env_consistency_check)
1679 __kmp_pop_sync(global_tid, ct_critical, loc);
1680
1681#if USE_ITT_BUILD
1682 __kmp_itt_critical_releasing(lck);
1683#endif /* USE_ITT_BUILD */
1684 // Value of 'crit' should be good for using as a critical_id of the critical
1685 // section directive.
1686 __kmp_release_user_lock_with_checks(lck, global_tid);
1687
1688#endif // KMP_USE_DYNAMIC_LOCK
1689
1690#if OMPT_SUPPORT && OMPT_OPTIONAL
1691 /* OMPT release event triggers after lock is released; place here to trigger
1692 * for all #if branches */
1693 OMPT_STORE_RETURN_ADDRESS(global_tid);
1694 if (ompt_enabled.ompt_callback_mutex_released) {
1695 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1696 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1697 OMPT_LOAD_RETURN_ADDRESS(0));
1698 }
1699#endif
1700
1701 KMP_POP_PARTITIONED_TIMER();
1702 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1703}
1704
1714kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1715 int status;
1716 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1717 __kmp_assert_valid_gtid(global_tid);
1718
1719 if (!TCR_4(__kmp_init_parallel))
1720 __kmp_parallel_initialize();
1721
1722 __kmp_resume_if_soft_paused();
1723
1724 if (__kmp_env_consistency_check)
1725 __kmp_check_barrier(global_tid, ct_barrier, loc);
1726
1727#if OMPT_SUPPORT
1728 ompt_frame_t *ompt_frame;
1729 if (ompt_enabled.enabled) {
1730 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1731 if (ompt_frame->enter_frame.ptr == NULL)
1732 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1733 }
1734 OMPT_STORE_RETURN_ADDRESS(global_tid);
1735#endif
1736#if USE_ITT_NOTIFY
1737 __kmp_threads[global_tid]->th.th_ident = loc;
1738#endif
1739 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1740#if OMPT_SUPPORT && OMPT_OPTIONAL
1741 if (ompt_enabled.enabled) {
1742 ompt_frame->enter_frame = ompt_data_none;
1743 }
1744#endif
1745
1746 return (status != 0) ? 0 : 1;
1747}
1748
1758void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1759 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1760 __kmp_assert_valid_gtid(global_tid);
1761 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1762}
1763
1774kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1775 kmp_int32 ret;
1776 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1777 __kmp_assert_valid_gtid(global_tid);
1778
1779 if (!TCR_4(__kmp_init_parallel))
1780 __kmp_parallel_initialize();
1781
1782 __kmp_resume_if_soft_paused();
1783
1784 if (__kmp_env_consistency_check) {
1785 if (loc == 0) {
1786 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1787 }
1788 __kmp_check_barrier(global_tid, ct_barrier, loc);
1789 }
1790
1791#if OMPT_SUPPORT
1792 ompt_frame_t *ompt_frame;
1793 if (ompt_enabled.enabled) {
1794 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1795 if (ompt_frame->enter_frame.ptr == NULL)
1796 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1797 }
1798 OMPT_STORE_RETURN_ADDRESS(global_tid);
1799#endif
1800#if USE_ITT_NOTIFY
1801 __kmp_threads[global_tid]->th.th_ident = loc;
1802#endif
1803 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1804#if OMPT_SUPPORT && OMPT_OPTIONAL
1805 if (ompt_enabled.enabled) {
1806 ompt_frame->enter_frame = ompt_data_none;
1807 }
1808#endif
1809
1810 ret = __kmpc_master(loc, global_tid);
1811
1812 if (__kmp_env_consistency_check) {
1813 /* there's no __kmpc_end_master called; so the (stats) */
1814 /* actions of __kmpc_end_master are done here */
1815 if (ret) {
1816 /* only one thread should do the pop since only */
1817 /* one did the push (see __kmpc_master()) */
1818 __kmp_pop_sync(global_tid, ct_master, loc);
1819 }
1820 }
1821
1822 return (ret);
1823}
1824
1825/* The BARRIER for a SINGLE process section is always explicit */
1837kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1838 __kmp_assert_valid_gtid(global_tid);
1839 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1840
1841 if (rc) {
1842 // We are going to execute the single statement, so we should count it.
1843 KMP_COUNT_BLOCK(OMP_SINGLE);
1844 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1845 }
1846
1847#if OMPT_SUPPORT && OMPT_OPTIONAL
1848 kmp_info_t *this_thr = __kmp_threads[global_tid];
1849 kmp_team_t *team = this_thr->th.th_team;
1850 int tid = __kmp_tid_from_gtid(global_tid);
1851
1852 if (ompt_enabled.enabled) {
1853 if (rc) {
1854 if (ompt_enabled.ompt_callback_work) {
1855 ompt_callbacks.ompt_callback(ompt_callback_work)(
1856 ompt_work_single_executor, ompt_scope_begin,
1857 &(team->t.ompt_team_info.parallel_data),
1858 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1859 1, OMPT_GET_RETURN_ADDRESS(0));
1860 }
1861 } else {
1862 if (ompt_enabled.ompt_callback_work) {
1863 ompt_callbacks.ompt_callback(ompt_callback_work)(
1864 ompt_work_single_other, ompt_scope_begin,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1867 1, OMPT_GET_RETURN_ADDRESS(0));
1868 ompt_callbacks.ompt_callback(ompt_callback_work)(
1869 ompt_work_single_other, ompt_scope_end,
1870 &(team->t.ompt_team_info.parallel_data),
1871 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1872 1, OMPT_GET_RETURN_ADDRESS(0));
1873 }
1874 }
1875 }
1876#endif
1877
1878 return rc;
1879}
1880
1890void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1891 __kmp_assert_valid_gtid(global_tid);
1892 __kmp_exit_single(global_tid);
1893 KMP_POP_PARTITIONED_TIMER();
1894
1895#if OMPT_SUPPORT && OMPT_OPTIONAL
1896 kmp_info_t *this_thr = __kmp_threads[global_tid];
1897 kmp_team_t *team = this_thr->th.th_team;
1898 int tid = __kmp_tid_from_gtid(global_tid);
1899
1900 if (ompt_enabled.ompt_callback_work) {
1901 ompt_callbacks.ompt_callback(ompt_callback_work)(
1902 ompt_work_single_executor, ompt_scope_end,
1903 &(team->t.ompt_team_info.parallel_data),
1904 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1905 OMPT_GET_RETURN_ADDRESS(0));
1906 }
1907#endif
1908}
1909
1917void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1918 KMP_POP_PARTITIONED_TIMER();
1919 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1920
1921#if OMPT_SUPPORT && OMPT_OPTIONAL
1922 if (ompt_enabled.ompt_callback_work) {
1923 ompt_work_t ompt_work_type = ompt_work_loop;
1924 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1925 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1926 // Determine workshare type
1927 if (loc != NULL) {
1928 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1929 ompt_work_type = ompt_work_loop;
1930 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1931 ompt_work_type = ompt_work_sections;
1932 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1933 ompt_work_type = ompt_work_distribute;
1934 } else {
1935 // use default set above.
1936 // a warning about this case is provided in __kmpc_for_static_init
1937 }
1938 KMP_DEBUG_ASSERT(ompt_work_type);
1939 }
1940 ompt_callbacks.ompt_callback(ompt_callback_work)(
1941 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1942 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1943 }
1944#endif
1945 if (__kmp_env_consistency_check)
1946 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1947}
1948
1949// User routines which take C-style arguments (call by value)
1950// different from the Fortran equivalent routines
1951
1952void ompc_set_num_threads(int arg) {
1953 // !!!!! TODO: check the per-task binding
1954 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1955}
1956
1957void ompc_set_dynamic(int flag) {
1958 kmp_info_t *thread;
1959
1960 /* For the thread-private implementation of the internal controls */
1961 thread = __kmp_entry_thread();
1962
1963 __kmp_save_internal_controls(thread);
1964
1965 set__dynamic(thread, flag ? true : false);
1966}
1967
1968void ompc_set_nested(int flag) {
1969 kmp_info_t *thread;
1970
1971 /* For the thread-private internal controls implementation */
1972 thread = __kmp_entry_thread();
1973
1974 __kmp_save_internal_controls(thread);
1975
1976 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1977}
1978
1979void ompc_set_max_active_levels(int max_active_levels) {
1980 /* TO DO */
1981 /* we want per-task implementation of this internal control */
1982
1983 /* For the per-thread internal controls implementation */
1984 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1985}
1986
1987void ompc_set_schedule(omp_sched_t kind, int modifier) {
1988 // !!!!! TODO: check the per-task binding
1989 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1990}
1991
1992int ompc_get_ancestor_thread_num(int level) {
1993 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1994}
1995
1996int ompc_get_team_size(int level) {
1997 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1998}
1999
2000/* OpenMP 5.0 Affinity Format API */
2001void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2002 if (!__kmp_init_serial) {
2003 __kmp_serial_initialize();
2004 }
2005 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2006 format, KMP_STRLEN(format) + 1);
2007}
2008
2009size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2010 size_t format_size;
2011 if (!__kmp_init_serial) {
2012 __kmp_serial_initialize();
2013 }
2014 format_size = KMP_STRLEN(__kmp_affinity_format);
2015 if (buffer && size) {
2016 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2017 format_size + 1);
2018 }
2019 return format_size;
2020}
2021
2022void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2023 int gtid;
2024 if (!TCR_4(__kmp_init_middle)) {
2025 __kmp_middle_initialize();
2026 }
2027 __kmp_assign_root_init_mask();
2028 gtid = __kmp_get_gtid();
2029#if KMP_AFFINITY_SUPPORTED
2030 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2031 __kmp_reset_root_init_mask(gtid);
2032 }
2033#endif
2034 __kmp_aux_display_affinity(gtid, format);
2035}
2036
2037size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2038 char const *format) {
2039 int gtid;
2040 size_t num_required;
2041 kmp_str_buf_t capture_buf;
2042 if (!TCR_4(__kmp_init_middle)) {
2043 __kmp_middle_initialize();
2044 }
2045 __kmp_assign_root_init_mask();
2046 gtid = __kmp_get_gtid();
2047#if KMP_AFFINITY_SUPPORTED
2048 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2049 __kmp_reset_root_init_mask(gtid);
2050 }
2051#endif
2052 __kmp_str_buf_init(&capture_buf);
2053 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2054 if (buffer && buf_size) {
2055 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2056 capture_buf.used + 1);
2057 }
2058 __kmp_str_buf_free(&capture_buf);
2059 return num_required;
2060}
2061
2062void kmpc_set_stacksize(int arg) {
2063 // __kmp_aux_set_stacksize initializes the library if needed
2064 __kmp_aux_set_stacksize(arg);
2065}
2066
2067void kmpc_set_stacksize_s(size_t arg) {
2068 // __kmp_aux_set_stacksize initializes the library if needed
2069 __kmp_aux_set_stacksize(arg);
2070}
2071
2072void kmpc_set_blocktime(int arg) {
2073 int gtid, tid;
2074 kmp_info_t *thread;
2075
2076 gtid = __kmp_entry_gtid();
2077 tid = __kmp_tid_from_gtid(gtid);
2078 thread = __kmp_thread_from_gtid(gtid);
2079
2080 __kmp_aux_set_blocktime(arg, thread, tid);
2081}
2082
2083void kmpc_set_library(int arg) {
2084 // __kmp_user_set_library initializes the library if needed
2085 __kmp_user_set_library((enum library_type)arg);
2086}
2087
2088void kmpc_set_defaults(char const *str) {
2089 // __kmp_aux_set_defaults initializes the library if needed
2090 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2091}
2092
2093void kmpc_set_disp_num_buffers(int arg) {
2094 // ignore after initialization because some teams have already
2095 // allocated dispatch buffers
2096 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2097 arg <= KMP_MAX_DISP_NUM_BUFF) {
2098 __kmp_dispatch_num_buffers = arg;
2099 }
2100}
2101
2102int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2103#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2104 return -1;
2105#else
2106 if (!TCR_4(__kmp_init_middle)) {
2107 __kmp_middle_initialize();
2108 }
2109 __kmp_assign_root_init_mask();
2110 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2111#endif
2112}
2113
2114int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2115#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2116 return -1;
2117#else
2118 if (!TCR_4(__kmp_init_middle)) {
2119 __kmp_middle_initialize();
2120 }
2121 __kmp_assign_root_init_mask();
2122 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2123#endif
2124}
2125
2126int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2127#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2128 return -1;
2129#else
2130 if (!TCR_4(__kmp_init_middle)) {
2131 __kmp_middle_initialize();
2132 }
2133 __kmp_assign_root_init_mask();
2134 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2135#endif
2136}
2137
2138/* -------------------------------------------------------------------------- */
2183void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2184 void *cpy_data, void (*cpy_func)(void *, void *),
2185 kmp_int32 didit) {
2186 void **data_ptr;
2187 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2188 __kmp_assert_valid_gtid(gtid);
2189
2190 KMP_MB();
2191
2192 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2193
2194 if (__kmp_env_consistency_check) {
2195 if (loc == 0) {
2196 KMP_WARNING(ConstructIdentInvalid);
2197 }
2198 }
2199
2200 // ToDo: Optimize the following two barriers into some kind of split barrier
2201
2202 if (didit)
2203 *data_ptr = cpy_data;
2204
2205#if OMPT_SUPPORT
2206 ompt_frame_t *ompt_frame;
2207 if (ompt_enabled.enabled) {
2208 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2209 if (ompt_frame->enter_frame.ptr == NULL)
2210 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2211 }
2212 OMPT_STORE_RETURN_ADDRESS(gtid);
2213#endif
2214/* This barrier is not a barrier region boundary */
2215#if USE_ITT_NOTIFY
2216 __kmp_threads[gtid]->th.th_ident = loc;
2217#endif
2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2219
2220 if (!didit)
2221 (*cpy_func)(cpy_data, *data_ptr);
2222
2223 // Consider next barrier a user-visible barrier for barrier region boundaries
2224 // Nesting checks are already handled by the single construct checks
2225 {
2226#if OMPT_SUPPORT
2227 OMPT_STORE_RETURN_ADDRESS(gtid);
2228#endif
2229#if USE_ITT_NOTIFY
2230 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2231// tasks can overwrite the location)
2232#endif
2233 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2234#if OMPT_SUPPORT && OMPT_OPTIONAL
2235 if (ompt_enabled.enabled) {
2236 ompt_frame->enter_frame = ompt_data_none;
2237 }
2238#endif
2239 }
2240}
2241
2242/* --------------------------------------------------------------------------*/
2259void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2260 void **data_ptr;
2261
2262 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2263
2264 KMP_MB();
2265
2266 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2267
2268 if (__kmp_env_consistency_check) {
2269 if (loc == 0) {
2270 KMP_WARNING(ConstructIdentInvalid);
2271 }
2272 }
2273
2274 // ToDo: Optimize the following barrier
2275
2276 if (cpy_data)
2277 *data_ptr = cpy_data;
2278
2279#if OMPT_SUPPORT
2280 ompt_frame_t *ompt_frame;
2281 if (ompt_enabled.enabled) {
2282 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2283 if (ompt_frame->enter_frame.ptr == NULL)
2284 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2285 OMPT_STORE_RETURN_ADDRESS(gtid);
2286 }
2287#endif
2288/* This barrier is not a barrier region boundary */
2289#if USE_ITT_NOTIFY
2290 __kmp_threads[gtid]->th.th_ident = loc;
2291#endif
2292 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2293
2294 return *data_ptr;
2295}
2296
2297/* -------------------------------------------------------------------------- */
2298
2299#define INIT_LOCK __kmp_init_user_lock_with_checks
2300#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2301#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2302#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2303#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2304#define ACQUIRE_NESTED_LOCK_TIMED \
2305 __kmp_acquire_nested_user_lock_with_checks_timed
2306#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2307#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2308#define TEST_LOCK __kmp_test_user_lock_with_checks
2309#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2310#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2311#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2312
2313// TODO: Make check abort messages use location info & pass it into
2314// with_checks routines
2315
2316#if KMP_USE_DYNAMIC_LOCK
2317
2318// internal lock initializer
2319static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2320 kmp_dyna_lockseq_t seq) {
2321 if (KMP_IS_D_LOCK(seq)) {
2322 KMP_INIT_D_LOCK(lock, seq);
2323#if USE_ITT_BUILD
2324 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2325#endif
2326 } else {
2327 KMP_INIT_I_LOCK(lock, seq);
2328#if USE_ITT_BUILD
2329 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2330 __kmp_itt_lock_creating(ilk->lock, loc);
2331#endif
2332 }
2333}
2334
2335// internal nest lock initializer
2336static __forceinline void
2337__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2338 kmp_dyna_lockseq_t seq) {
2339#if KMP_USE_TSX
2340 // Don't have nested lock implementation for speculative locks
2341 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2342 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2343 seq = __kmp_user_lock_seq;
2344#endif
2345 switch (seq) {
2346 case lockseq_tas:
2347 seq = lockseq_nested_tas;
2348 break;
2349#if KMP_USE_FUTEX
2350 case lockseq_futex:
2351 seq = lockseq_nested_futex;
2352 break;
2353#endif
2354 case lockseq_ticket:
2355 seq = lockseq_nested_ticket;
2356 break;
2357 case lockseq_queuing:
2358 seq = lockseq_nested_queuing;
2359 break;
2360 case lockseq_drdpa:
2361 seq = lockseq_nested_drdpa;
2362 break;
2363 default:
2364 seq = lockseq_nested_queuing;
2365 }
2366 KMP_INIT_I_LOCK(lock, seq);
2367#if USE_ITT_BUILD
2368 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2369 __kmp_itt_lock_creating(ilk->lock, loc);
2370#endif
2371}
2372
2373/* initialize the lock with a hint */
2374void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2375 uintptr_t hint) {
2376 KMP_DEBUG_ASSERT(__kmp_init_serial);
2377 if (__kmp_env_consistency_check && user_lock == NULL) {
2378 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2379 }
2380
2381 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2382
2383#if OMPT_SUPPORT && OMPT_OPTIONAL
2384 // This is the case, if called from omp_init_lock_with_hint:
2385 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2386 if (!codeptr)
2387 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2388 if (ompt_enabled.ompt_callback_lock_init) {
2389 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2390 ompt_mutex_lock, (omp_lock_hint_t)hint,
2391 __ompt_get_mutex_impl_type(user_lock),
2392 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2393 }
2394#endif
2395}
2396
2397/* initialize the lock with a hint */
2398void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2399 void **user_lock, uintptr_t hint) {
2400 KMP_DEBUG_ASSERT(__kmp_init_serial);
2401 if (__kmp_env_consistency_check && user_lock == NULL) {
2402 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2403 }
2404
2405 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2406
2407#if OMPT_SUPPORT && OMPT_OPTIONAL
2408 // This is the case, if called from omp_init_lock_with_hint:
2409 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2410 if (!codeptr)
2411 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2412 if (ompt_enabled.ompt_callback_lock_init) {
2413 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2414 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2415 __ompt_get_mutex_impl_type(user_lock),
2416 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2417 }
2418#endif
2419}
2420
2421#endif // KMP_USE_DYNAMIC_LOCK
2422
2423/* initialize the lock */
2424void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2425#if KMP_USE_DYNAMIC_LOCK
2426
2427 KMP_DEBUG_ASSERT(__kmp_init_serial);
2428 if (__kmp_env_consistency_check && user_lock == NULL) {
2429 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2430 }
2431 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2432
2433#if OMPT_SUPPORT && OMPT_OPTIONAL
2434 // This is the case, if called from omp_init_lock_with_hint:
2435 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2436 if (!codeptr)
2437 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2438 if (ompt_enabled.ompt_callback_lock_init) {
2439 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2440 ompt_mutex_lock, omp_lock_hint_none,
2441 __ompt_get_mutex_impl_type(user_lock),
2442 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2443 }
2444#endif
2445
2446#else // KMP_USE_DYNAMIC_LOCK
2447
2448 static char const *const func = "omp_init_lock";
2449 kmp_user_lock_p lck;
2450 KMP_DEBUG_ASSERT(__kmp_init_serial);
2451
2452 if (__kmp_env_consistency_check) {
2453 if (user_lock == NULL) {
2454 KMP_FATAL(LockIsUninitialized, func);
2455 }
2456 }
2457
2458 KMP_CHECK_USER_LOCK_INIT();
2459
2460 if ((__kmp_user_lock_kind == lk_tas) &&
2461 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462 lck = (kmp_user_lock_p)user_lock;
2463 }
2464#if KMP_USE_FUTEX
2465 else if ((__kmp_user_lock_kind == lk_futex) &&
2466 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2467 lck = (kmp_user_lock_p)user_lock;
2468 }
2469#endif
2470 else {
2471 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2472 }
2473 INIT_LOCK(lck);
2474 __kmp_set_user_lock_location(lck, loc);
2475
2476#if OMPT_SUPPORT && OMPT_OPTIONAL
2477 // This is the case, if called from omp_init_lock_with_hint:
2478 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2479 if (!codeptr)
2480 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2481 if (ompt_enabled.ompt_callback_lock_init) {
2482 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2483 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2484 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2485 }
2486#endif
2487
2488#if USE_ITT_BUILD
2489 __kmp_itt_lock_creating(lck);
2490#endif /* USE_ITT_BUILD */
2491
2492#endif // KMP_USE_DYNAMIC_LOCK
2493} // __kmpc_init_lock
2494
2495/* initialize the lock */
2496void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2497#if KMP_USE_DYNAMIC_LOCK
2498
2499 KMP_DEBUG_ASSERT(__kmp_init_serial);
2500 if (__kmp_env_consistency_check && user_lock == NULL) {
2501 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2502 }
2503 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2504
2505#if OMPT_SUPPORT && OMPT_OPTIONAL
2506 // This is the case, if called from omp_init_lock_with_hint:
2507 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2508 if (!codeptr)
2509 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2510 if (ompt_enabled.ompt_callback_lock_init) {
2511 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2512 ompt_mutex_nest_lock, omp_lock_hint_none,
2513 __ompt_get_mutex_impl_type(user_lock),
2514 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2515 }
2516#endif
2517
2518#else // KMP_USE_DYNAMIC_LOCK
2519
2520 static char const *const func = "omp_init_nest_lock";
2521 kmp_user_lock_p lck;
2522 KMP_DEBUG_ASSERT(__kmp_init_serial);
2523
2524 if (__kmp_env_consistency_check) {
2525 if (user_lock == NULL) {
2526 KMP_FATAL(LockIsUninitialized, func);
2527 }
2528 }
2529
2530 KMP_CHECK_USER_LOCK_INIT();
2531
2532 if ((__kmp_user_lock_kind == lk_tas) &&
2533 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2534 OMP_NEST_LOCK_T_SIZE)) {
2535 lck = (kmp_user_lock_p)user_lock;
2536 }
2537#if KMP_USE_FUTEX
2538 else if ((__kmp_user_lock_kind == lk_futex) &&
2539 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2540 OMP_NEST_LOCK_T_SIZE)) {
2541 lck = (kmp_user_lock_p)user_lock;
2542 }
2543#endif
2544 else {
2545 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2546 }
2547
2548 INIT_NESTED_LOCK(lck);
2549 __kmp_set_user_lock_location(lck, loc);
2550
2551#if OMPT_SUPPORT && OMPT_OPTIONAL
2552 // This is the case, if called from omp_init_lock_with_hint:
2553 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2554 if (!codeptr)
2555 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2556 if (ompt_enabled.ompt_callback_lock_init) {
2557 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2558 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2559 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2560 }
2561#endif
2562
2563#if USE_ITT_BUILD
2564 __kmp_itt_lock_creating(lck);
2565#endif /* USE_ITT_BUILD */
2566
2567#endif // KMP_USE_DYNAMIC_LOCK
2568} // __kmpc_init_nest_lock
2569
2570void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2571#if KMP_USE_DYNAMIC_LOCK
2572
2573#if USE_ITT_BUILD
2574 kmp_user_lock_p lck;
2575 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2576 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2577 } else {
2578 lck = (kmp_user_lock_p)user_lock;
2579 }
2580 __kmp_itt_lock_destroyed(lck);
2581#endif
2582#if OMPT_SUPPORT && OMPT_OPTIONAL
2583 // This is the case, if called from omp_init_lock_with_hint:
2584 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2585 if (!codeptr)
2586 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2587 if (ompt_enabled.ompt_callback_lock_destroy) {
2588 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2589 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2590 }
2591#endif
2592 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2593#else
2594 kmp_user_lock_p lck;
2595
2596 if ((__kmp_user_lock_kind == lk_tas) &&
2597 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2599 }
2600#if KMP_USE_FUTEX
2601 else if ((__kmp_user_lock_kind == lk_futex) &&
2602 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2604 }
2605#endif
2606 else {
2607 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2608 }
2609
2610#if OMPT_SUPPORT && OMPT_OPTIONAL
2611 // This is the case, if called from omp_init_lock_with_hint:
2612 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2613 if (!codeptr)
2614 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2615 if (ompt_enabled.ompt_callback_lock_destroy) {
2616 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2617 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2618 }
2619#endif
2620
2621#if USE_ITT_BUILD
2622 __kmp_itt_lock_destroyed(lck);
2623#endif /* USE_ITT_BUILD */
2624 DESTROY_LOCK(lck);
2625
2626 if ((__kmp_user_lock_kind == lk_tas) &&
2627 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2628 ;
2629 }
2630#if KMP_USE_FUTEX
2631 else if ((__kmp_user_lock_kind == lk_futex) &&
2632 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2633 ;
2634 }
2635#endif
2636 else {
2637 __kmp_user_lock_free(user_lock, gtid, lck);
2638 }
2639#endif // KMP_USE_DYNAMIC_LOCK
2640} // __kmpc_destroy_lock
2641
2642/* destroy the lock */
2643void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2644#if KMP_USE_DYNAMIC_LOCK
2645
2646#if USE_ITT_BUILD
2647 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2648 __kmp_itt_lock_destroyed(ilk->lock);
2649#endif
2650#if OMPT_SUPPORT && OMPT_OPTIONAL
2651 // This is the case, if called from omp_init_lock_with_hint:
2652 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2653 if (!codeptr)
2654 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2655 if (ompt_enabled.ompt_callback_lock_destroy) {
2656 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2657 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2658 }
2659#endif
2660 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2661
2662#else // KMP_USE_DYNAMIC_LOCK
2663
2664 kmp_user_lock_p lck;
2665
2666 if ((__kmp_user_lock_kind == lk_tas) &&
2667 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2668 OMP_NEST_LOCK_T_SIZE)) {
2669 lck = (kmp_user_lock_p)user_lock;
2670 }
2671#if KMP_USE_FUTEX
2672 else if ((__kmp_user_lock_kind == lk_futex) &&
2673 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2674 OMP_NEST_LOCK_T_SIZE)) {
2675 lck = (kmp_user_lock_p)user_lock;
2676 }
2677#endif
2678 else {
2679 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2680 }
2681
2682#if OMPT_SUPPORT && OMPT_OPTIONAL
2683 // This is the case, if called from omp_init_lock_with_hint:
2684 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685 if (!codeptr)
2686 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2687 if (ompt_enabled.ompt_callback_lock_destroy) {
2688 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2689 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2690 }
2691#endif
2692
2693#if USE_ITT_BUILD
2694 __kmp_itt_lock_destroyed(lck);
2695#endif /* USE_ITT_BUILD */
2696
2697 DESTROY_NESTED_LOCK(lck);
2698
2699 if ((__kmp_user_lock_kind == lk_tas) &&
2700 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2701 OMP_NEST_LOCK_T_SIZE)) {
2702 ;
2703 }
2704#if KMP_USE_FUTEX
2705 else if ((__kmp_user_lock_kind == lk_futex) &&
2706 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2707 OMP_NEST_LOCK_T_SIZE)) {
2708 ;
2709 }
2710#endif
2711 else {
2712 __kmp_user_lock_free(user_lock, gtid, lck);
2713 }
2714#endif // KMP_USE_DYNAMIC_LOCK
2715} // __kmpc_destroy_nest_lock
2716
2717void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2718 KMP_COUNT_BLOCK(OMP_set_lock);
2719#if KMP_USE_DYNAMIC_LOCK
2720 int tag = KMP_EXTRACT_D_TAG(user_lock);
2721#if USE_ITT_BUILD
2722 __kmp_itt_lock_acquiring(
2723 (kmp_user_lock_p)
2724 user_lock); // itt function will get to the right lock object.
2725#endif
2726#if OMPT_SUPPORT && OMPT_OPTIONAL
2727 // This is the case, if called from omp_init_lock_with_hint:
2728 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2729 if (!codeptr)
2730 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2731 if (ompt_enabled.ompt_callback_mutex_acquire) {
2732 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2733 ompt_mutex_lock, omp_lock_hint_none,
2734 __ompt_get_mutex_impl_type(user_lock),
2735 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2736 }
2737#endif
2738#if KMP_USE_INLINED_TAS
2739 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2740 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2741 } else
2742#elif KMP_USE_INLINED_FUTEX
2743 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2744 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2745 } else
2746#endif
2747 {
2748 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2749 }
2750#if USE_ITT_BUILD
2751 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2752#endif
2753#if OMPT_SUPPORT && OMPT_OPTIONAL
2754 if (ompt_enabled.ompt_callback_mutex_acquired) {
2755 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2756 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2757 }
2758#endif
2759
2760#else // KMP_USE_DYNAMIC_LOCK
2761
2762 kmp_user_lock_p lck;
2763
2764 if ((__kmp_user_lock_kind == lk_tas) &&
2765 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2766 lck = (kmp_user_lock_p)user_lock;
2767 }
2768#if KMP_USE_FUTEX
2769 else if ((__kmp_user_lock_kind == lk_futex) &&
2770 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2771 lck = (kmp_user_lock_p)user_lock;
2772 }
2773#endif
2774 else {
2775 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2776 }
2777
2778#if USE_ITT_BUILD
2779 __kmp_itt_lock_acquiring(lck);
2780#endif /* USE_ITT_BUILD */
2781#if OMPT_SUPPORT && OMPT_OPTIONAL
2782 // This is the case, if called from omp_init_lock_with_hint:
2783 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2784 if (!codeptr)
2785 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2786 if (ompt_enabled.ompt_callback_mutex_acquire) {
2787 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2788 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2789 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2790 }
2791#endif
2792
2793 ACQUIRE_LOCK(lck, gtid);
2794
2795#if USE_ITT_BUILD
2796 __kmp_itt_lock_acquired(lck);
2797#endif /* USE_ITT_BUILD */
2798
2799#if OMPT_SUPPORT && OMPT_OPTIONAL
2800 if (ompt_enabled.ompt_callback_mutex_acquired) {
2801 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2802 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2803 }
2804#endif
2805
2806#endif // KMP_USE_DYNAMIC_LOCK
2807}
2808
2809void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2810#if KMP_USE_DYNAMIC_LOCK
2811
2812#if USE_ITT_BUILD
2813 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2814#endif
2815#if OMPT_SUPPORT && OMPT_OPTIONAL
2816 // This is the case, if called from omp_init_lock_with_hint:
2817 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818 if (!codeptr)
2819 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820 if (ompt_enabled.enabled) {
2821 if (ompt_enabled.ompt_callback_mutex_acquire) {
2822 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2823 ompt_mutex_nest_lock, omp_lock_hint_none,
2824 __ompt_get_mutex_impl_type(user_lock),
2825 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2826 }
2827 }
2828#endif
2829 int acquire_status =
2830 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2831 (void)acquire_status;
2832#if USE_ITT_BUILD
2833 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2834#endif
2835
2836#if OMPT_SUPPORT && OMPT_OPTIONAL
2837 if (ompt_enabled.enabled) {
2838 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2839 if (ompt_enabled.ompt_callback_mutex_acquired) {
2840 // lock_first
2841 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2842 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2843 codeptr);
2844 }
2845 } else {
2846 if (ompt_enabled.ompt_callback_nest_lock) {
2847 // lock_next
2848 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2849 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2850 }
2851 }
2852 }
2853#endif
2854
2855#else // KMP_USE_DYNAMIC_LOCK
2856 int acquire_status;
2857 kmp_user_lock_p lck;
2858
2859 if ((__kmp_user_lock_kind == lk_tas) &&
2860 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2861 OMP_NEST_LOCK_T_SIZE)) {
2862 lck = (kmp_user_lock_p)user_lock;
2863 }
2864#if KMP_USE_FUTEX
2865 else if ((__kmp_user_lock_kind == lk_futex) &&
2866 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2867 OMP_NEST_LOCK_T_SIZE)) {
2868 lck = (kmp_user_lock_p)user_lock;
2869 }
2870#endif
2871 else {
2872 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2873 }
2874
2875#if USE_ITT_BUILD
2876 __kmp_itt_lock_acquiring(lck);
2877#endif /* USE_ITT_BUILD */
2878#if OMPT_SUPPORT && OMPT_OPTIONAL
2879 // This is the case, if called from omp_init_lock_with_hint:
2880 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2881 if (!codeptr)
2882 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2883 if (ompt_enabled.enabled) {
2884 if (ompt_enabled.ompt_callback_mutex_acquire) {
2885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2886 ompt_mutex_nest_lock, omp_lock_hint_none,
2887 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2888 codeptr);
2889 }
2890 }
2891#endif
2892
2893 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2894
2895#if USE_ITT_BUILD
2896 __kmp_itt_lock_acquired(lck);
2897#endif /* USE_ITT_BUILD */
2898
2899#if OMPT_SUPPORT && OMPT_OPTIONAL
2900 if (ompt_enabled.enabled) {
2901 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2902 if (ompt_enabled.ompt_callback_mutex_acquired) {
2903 // lock_first
2904 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2905 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2906 }
2907 } else {
2908 if (ompt_enabled.ompt_callback_nest_lock) {
2909 // lock_next
2910 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2911 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2912 }
2913 }
2914 }
2915#endif
2916
2917#endif // KMP_USE_DYNAMIC_LOCK
2918}
2919
2920void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2921#if KMP_USE_DYNAMIC_LOCK
2922
2923 int tag = KMP_EXTRACT_D_TAG(user_lock);
2924#if USE_ITT_BUILD
2925 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2926#endif
2927#if KMP_USE_INLINED_TAS
2928 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2929 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2930 } else
2931#elif KMP_USE_INLINED_FUTEX
2932 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2933 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2934 } else
2935#endif
2936 {
2937 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2938 }
2939
2940#if OMPT_SUPPORT && OMPT_OPTIONAL
2941 // This is the case, if called from omp_init_lock_with_hint:
2942 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2943 if (!codeptr)
2944 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2945 if (ompt_enabled.ompt_callback_mutex_released) {
2946 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2947 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2948 }
2949#endif
2950
2951#else // KMP_USE_DYNAMIC_LOCK
2952
2953 kmp_user_lock_p lck;
2954
2955 /* Can't use serial interval since not block structured */
2956 /* release the lock */
2957
2958 if ((__kmp_user_lock_kind == lk_tas) &&
2959 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2960#if KMP_OS_LINUX && \
2961 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2962// "fast" path implemented to fix customer performance issue
2963#if USE_ITT_BUILD
2964 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2965#endif /* USE_ITT_BUILD */
2966 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2967 KMP_MB();
2968
2969#if OMPT_SUPPORT && OMPT_OPTIONAL
2970 // This is the case, if called from omp_init_lock_with_hint:
2971 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2972 if (!codeptr)
2973 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2974 if (ompt_enabled.ompt_callback_mutex_released) {
2975 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2976 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2977 }
2978#endif
2979
2980 return;
2981#else
2982 lck = (kmp_user_lock_p)user_lock;
2983#endif
2984 }
2985#if KMP_USE_FUTEX
2986 else if ((__kmp_user_lock_kind == lk_futex) &&
2987 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2988 lck = (kmp_user_lock_p)user_lock;
2989 }
2990#endif
2991 else {
2992 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2993 }
2994
2995#if USE_ITT_BUILD
2996 __kmp_itt_lock_releasing(lck);
2997#endif /* USE_ITT_BUILD */
2998
2999 RELEASE_LOCK(lck, gtid);
3000
3001#if OMPT_SUPPORT && OMPT_OPTIONAL
3002 // This is the case, if called from omp_init_lock_with_hint:
3003 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3004 if (!codeptr)
3005 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3006 if (ompt_enabled.ompt_callback_mutex_released) {
3007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3008 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3009 }
3010#endif
3011
3012#endif // KMP_USE_DYNAMIC_LOCK
3013}
3014
3015/* release the lock */
3016void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3017#if KMP_USE_DYNAMIC_LOCK
3018
3019#if USE_ITT_BUILD
3020 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3021#endif
3022 int release_status =
3023 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3024 (void)release_status;
3025
3026#if OMPT_SUPPORT && OMPT_OPTIONAL
3027 // This is the case, if called from omp_init_lock_with_hint:
3028 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3029 if (!codeptr)
3030 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3031 if (ompt_enabled.enabled) {
3032 if (release_status == KMP_LOCK_RELEASED) {
3033 if (ompt_enabled.ompt_callback_mutex_released) {
3034 // release_lock_last
3035 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3037 codeptr);
3038 }
3039 } else if (ompt_enabled.ompt_callback_nest_lock) {
3040 // release_lock_prev
3041 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3042 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3043 }
3044 }
3045#endif
3046
3047#else // KMP_USE_DYNAMIC_LOCK
3048
3049 kmp_user_lock_p lck;
3050
3051 /* Can't use serial interval since not block structured */
3052
3053 if ((__kmp_user_lock_kind == lk_tas) &&
3054 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3055 OMP_NEST_LOCK_T_SIZE)) {
3056#if KMP_OS_LINUX && \
3057 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3058 // "fast" path implemented to fix customer performance issue
3059 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3060#if USE_ITT_BUILD
3061 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3062#endif /* USE_ITT_BUILD */
3063
3064#if OMPT_SUPPORT && OMPT_OPTIONAL
3065 int release_status = KMP_LOCK_STILL_HELD;
3066#endif
3067
3068 if (--(tl->lk.depth_locked) == 0) {
3069 TCW_4(tl->lk.poll, 0);
3070#if OMPT_SUPPORT && OMPT_OPTIONAL
3071 release_status = KMP_LOCK_RELEASED;
3072#endif
3073 }
3074 KMP_MB();
3075
3076#if OMPT_SUPPORT && OMPT_OPTIONAL
3077 // This is the case, if called from omp_init_lock_with_hint:
3078 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3079 if (!codeptr)
3080 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3081 if (ompt_enabled.enabled) {
3082 if (release_status == KMP_LOCK_RELEASED) {
3083 if (ompt_enabled.ompt_callback_mutex_released) {
3084 // release_lock_last
3085 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3086 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3087 }
3088 } else if (ompt_enabled.ompt_callback_nest_lock) {
3089 // release_lock_previous
3090 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3091 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3092 }
3093 }
3094#endif
3095
3096 return;
3097#else
3098 lck = (kmp_user_lock_p)user_lock;
3099#endif
3100 }
3101#if KMP_USE_FUTEX
3102 else if ((__kmp_user_lock_kind == lk_futex) &&
3103 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3104 OMP_NEST_LOCK_T_SIZE)) {
3105 lck = (kmp_user_lock_p)user_lock;
3106 }
3107#endif
3108 else {
3109 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3110 }
3111
3112#if USE_ITT_BUILD
3113 __kmp_itt_lock_releasing(lck);
3114#endif /* USE_ITT_BUILD */
3115
3116 int release_status;
3117 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3118#if OMPT_SUPPORT && OMPT_OPTIONAL
3119 // This is the case, if called from omp_init_lock_with_hint:
3120 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3121 if (!codeptr)
3122 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3123 if (ompt_enabled.enabled) {
3124 if (release_status == KMP_LOCK_RELEASED) {
3125 if (ompt_enabled.ompt_callback_mutex_released) {
3126 // release_lock_last
3127 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3128 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3129 }
3130 } else if (ompt_enabled.ompt_callback_nest_lock) {
3131 // release_lock_previous
3132 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3133 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3134 }
3135 }
3136#endif
3137
3138#endif // KMP_USE_DYNAMIC_LOCK
3139}
3140
3141/* try to acquire the lock */
3142int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3143 KMP_COUNT_BLOCK(OMP_test_lock);
3144
3145#if KMP_USE_DYNAMIC_LOCK
3146 int rc;
3147 int tag = KMP_EXTRACT_D_TAG(user_lock);
3148#if USE_ITT_BUILD
3149 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3150#endif
3151#if OMPT_SUPPORT && OMPT_OPTIONAL
3152 // This is the case, if called from omp_init_lock_with_hint:
3153 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3154 if (!codeptr)
3155 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3156 if (ompt_enabled.ompt_callback_mutex_acquire) {
3157 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3158 ompt_mutex_lock, omp_lock_hint_none,
3159 __ompt_get_mutex_impl_type(user_lock),
3160 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3161 }
3162#endif
3163#if KMP_USE_INLINED_TAS
3164 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3165 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3166 } else
3167#elif KMP_USE_INLINED_FUTEX
3168 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3169 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3170 } else
3171#endif
3172 {
3173 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3174 }
3175 if (rc) {
3176#if USE_ITT_BUILD
3177 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3178#endif
3179#if OMPT_SUPPORT && OMPT_OPTIONAL
3180 if (ompt_enabled.ompt_callback_mutex_acquired) {
3181 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3182 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3183 }
3184#endif
3185 return FTN_TRUE;
3186 } else {
3187#if USE_ITT_BUILD
3188 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3189#endif
3190 return FTN_FALSE;
3191 }
3192
3193#else // KMP_USE_DYNAMIC_LOCK
3194
3195 kmp_user_lock_p lck;
3196 int rc;
3197
3198 if ((__kmp_user_lock_kind == lk_tas) &&
3199 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3200 lck = (kmp_user_lock_p)user_lock;
3201 }
3202#if KMP_USE_FUTEX
3203 else if ((__kmp_user_lock_kind == lk_futex) &&
3204 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3205 lck = (kmp_user_lock_p)user_lock;
3206 }
3207#endif
3208 else {
3209 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3210 }
3211
3212#if USE_ITT_BUILD
3213 __kmp_itt_lock_acquiring(lck);
3214#endif /* USE_ITT_BUILD */
3215#if OMPT_SUPPORT && OMPT_OPTIONAL
3216 // This is the case, if called from omp_init_lock_with_hint:
3217 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3218 if (!codeptr)
3219 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3220 if (ompt_enabled.ompt_callback_mutex_acquire) {
3221 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3222 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3223 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3224 }
3225#endif
3226
3227 rc = TEST_LOCK(lck, gtid);
3228#if USE_ITT_BUILD
3229 if (rc) {
3230 __kmp_itt_lock_acquired(lck);
3231 } else {
3232 __kmp_itt_lock_cancelled(lck);
3233 }
3234#endif /* USE_ITT_BUILD */
3235#if OMPT_SUPPORT && OMPT_OPTIONAL
3236 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3237 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3238 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3239 }
3240#endif
3241
3242 return (rc ? FTN_TRUE : FTN_FALSE);
3243
3244 /* Can't use serial interval since not block structured */
3245
3246#endif // KMP_USE_DYNAMIC_LOCK
3247}
3248
3249/* try to acquire the lock */
3250int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3251#if KMP_USE_DYNAMIC_LOCK
3252 int rc;
3253#if USE_ITT_BUILD
3254 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3255#endif
3256#if OMPT_SUPPORT && OMPT_OPTIONAL
3257 // This is the case, if called from omp_init_lock_with_hint:
3258 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3259 if (!codeptr)
3260 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3261 if (ompt_enabled.ompt_callback_mutex_acquire) {
3262 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3263 ompt_mutex_nest_lock, omp_lock_hint_none,
3264 __ompt_get_mutex_impl_type(user_lock),
3265 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3266 }
3267#endif
3268 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3269#if USE_ITT_BUILD
3270 if (rc) {
3271 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3272 } else {
3273 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3274 }
3275#endif
3276#if OMPT_SUPPORT && OMPT_OPTIONAL
3277 if (ompt_enabled.enabled && rc) {
3278 if (rc == 1) {
3279 if (ompt_enabled.ompt_callback_mutex_acquired) {
3280 // lock_first
3281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3282 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3283 codeptr);
3284 }
3285 } else {
3286 if (ompt_enabled.ompt_callback_nest_lock) {
3287 // lock_next
3288 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3289 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3290 }
3291 }
3292 }
3293#endif
3294 return rc;
3295
3296#else // KMP_USE_DYNAMIC_LOCK
3297
3298 kmp_user_lock_p lck;
3299 int rc;
3300
3301 if ((__kmp_user_lock_kind == lk_tas) &&
3302 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3303 OMP_NEST_LOCK_T_SIZE)) {
3304 lck = (kmp_user_lock_p)user_lock;
3305 }
3306#if KMP_USE_FUTEX
3307 else if ((__kmp_user_lock_kind == lk_futex) &&
3308 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3309 OMP_NEST_LOCK_T_SIZE)) {
3310 lck = (kmp_user_lock_p)user_lock;
3311 }
3312#endif
3313 else {
3314 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3315 }
3316
3317#if USE_ITT_BUILD
3318 __kmp_itt_lock_acquiring(lck);
3319#endif /* USE_ITT_BUILD */
3320
3321#if OMPT_SUPPORT && OMPT_OPTIONAL
3322 // This is the case, if called from omp_init_lock_with_hint:
3323 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3324 if (!codeptr)
3325 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3326 if (ompt_enabled.enabled) &&
3327 ompt_enabled.ompt_callback_mutex_acquire) {
3328 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3329 ompt_mutex_nest_lock, omp_lock_hint_none,
3330 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3331 codeptr);
3332 }
3333#endif
3334
3335 rc = TEST_NESTED_LOCK(lck, gtid);
3336#if USE_ITT_BUILD
3337 if (rc) {
3338 __kmp_itt_lock_acquired(lck);
3339 } else {
3340 __kmp_itt_lock_cancelled(lck);
3341 }
3342#endif /* USE_ITT_BUILD */
3343#if OMPT_SUPPORT && OMPT_OPTIONAL
3344 if (ompt_enabled.enabled && rc) {
3345 if (rc == 1) {
3346 if (ompt_enabled.ompt_callback_mutex_acquired) {
3347 // lock_first
3348 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3349 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3350 }
3351 } else {
3352 if (ompt_enabled.ompt_callback_nest_lock) {
3353 // lock_next
3354 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3355 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3356 }
3357 }
3358 }
3359#endif
3360 return rc;
3361
3362 /* Can't use serial interval since not block structured */
3363
3364#endif // KMP_USE_DYNAMIC_LOCK
3365}
3366
3367// Interface to fast scalable reduce methods routines
3368
3369// keep the selected method in a thread local structure for cross-function
3370// usage: will be used in __kmpc_end_reduce* functions;
3371// another solution: to re-determine the method one more time in
3372// __kmpc_end_reduce* functions (new prototype required then)
3373// AT: which solution is better?
3374#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3375 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3376
3377#define __KMP_GET_REDUCTION_METHOD(gtid) \
3378 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3379
3380// description of the packed_reduction_method variable: look at the macros in
3381// kmp.h
3382
3383// used in a critical section reduce block
3384static __forceinline void
3385__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3386 kmp_critical_name *crit) {
3387
3388 // this lock was visible to a customer and to the threading profile tool as a
3389 // serial overhead span (although it's used for an internal purpose only)
3390 // why was it visible in previous implementation?
3391 // should we keep it visible in new reduce block?
3392 kmp_user_lock_p lck;
3393
3394#if KMP_USE_DYNAMIC_LOCK
3395
3396 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3397 // Check if it is initialized.
3398 if (*lk == 0) {
3399 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3400 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3401 KMP_GET_D_TAG(__kmp_user_lock_seq));
3402 } else {
3403 __kmp_init_indirect_csptr(crit, loc, global_tid,
3404 KMP_GET_I_TAG(__kmp_user_lock_seq));
3405 }
3406 }
3407 // Branch for accessing the actual lock object and set operation. This
3408 // branching is inevitable since this lock initialization does not follow the
3409 // normal dispatch path (lock table is not used).
3410 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3411 lck = (kmp_user_lock_p)lk;
3412 KMP_DEBUG_ASSERT(lck != NULL);
3413 if (__kmp_env_consistency_check) {
3414 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3415 }
3416 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3417 } else {
3418 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3419 lck = ilk->lock;
3420 KMP_DEBUG_ASSERT(lck != NULL);
3421 if (__kmp_env_consistency_check) {
3422 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3423 }
3424 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3425 }
3426
3427#else // KMP_USE_DYNAMIC_LOCK
3428
3429 // We know that the fast reduction code is only emitted by Intel compilers
3430 // with 32 byte critical sections. If there isn't enough space, then we
3431 // have to use a pointer.
3432 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3433 lck = (kmp_user_lock_p)crit;
3434 } else {
3435 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3436 }
3437 KMP_DEBUG_ASSERT(lck != NULL);
3438
3439 if (__kmp_env_consistency_check)
3440 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3441
3442 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3443
3444#endif // KMP_USE_DYNAMIC_LOCK
3445}
3446
3447// used in a critical section reduce block
3448static __forceinline void
3449__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3450 kmp_critical_name *crit) {
3451
3452 kmp_user_lock_p lck;
3453
3454#if KMP_USE_DYNAMIC_LOCK
3455
3456 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3457 lck = (kmp_user_lock_p)crit;
3458 if (__kmp_env_consistency_check)
3459 __kmp_pop_sync(global_tid, ct_critical, loc);
3460 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3461 } else {
3462 kmp_indirect_lock_t *ilk =
3463 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3464 if (__kmp_env_consistency_check)
3465 __kmp_pop_sync(global_tid, ct_critical, loc);
3466 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3467 }
3468
3469#else // KMP_USE_DYNAMIC_LOCK
3470
3471 // We know that the fast reduction code is only emitted by Intel compilers
3472 // with 32 byte critical sections. If there isn't enough space, then we have
3473 // to use a pointer.
3474 if (__kmp_base_user_lock_size > 32) {
3475 lck = *((kmp_user_lock_p *)crit);
3476 KMP_ASSERT(lck != NULL);
3477 } else {
3478 lck = (kmp_user_lock_p)crit;
3479 }
3480
3481 if (__kmp_env_consistency_check)
3482 __kmp_pop_sync(global_tid, ct_critical, loc);
3483
3484 __kmp_release_user_lock_with_checks(lck, global_tid);
3485
3486#endif // KMP_USE_DYNAMIC_LOCK
3487} // __kmp_end_critical_section_reduce_block
3488
3489static __forceinline int
3490__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3491 int *task_state) {
3492 kmp_team_t *team;
3493
3494 // Check if we are inside the teams construct?
3495 if (th->th.th_teams_microtask) {
3496 *team_p = team = th->th.th_team;
3497 if (team->t.t_level == th->th.th_teams_level) {
3498 // This is reduction at teams construct.
3499 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3500 // Let's swap teams temporarily for the reduction.
3501 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3502 th->th.th_team = team->t.t_parent;
3503 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3504 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3505 *task_state = th->th.th_task_state;
3506 th->th.th_task_state = 0;
3507
3508 return 1;
3509 }
3510 }
3511 return 0;
3512}
3513
3514static __forceinline void
3515__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3516 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3517 th->th.th_info.ds.ds_tid = 0;
3518 th->th.th_team = team;
3519 th->th.th_team_nproc = team->t.t_nproc;
3520 th->th.th_task_team = team->t.t_task_team[task_state];
3521 __kmp_type_convert(task_state, &(th->th.th_task_state));
3522}
3523
3524/* 2.a.i. Reduce Block without a terminating barrier */
3540kmp_int32
3541__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3542 size_t reduce_size, void *reduce_data,
3543 void (*reduce_func)(void *lhs_data, void *rhs_data),
3544 kmp_critical_name *lck) {
3545
3546 KMP_COUNT_BLOCK(REDUCE_nowait);
3547 int retval = 0;
3548 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3549 kmp_info_t *th;
3550 kmp_team_t *team;
3551 int teams_swapped = 0, task_state;
3552 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3553 __kmp_assert_valid_gtid(global_tid);
3554
3555 // why do we need this initialization here at all?
3556 // Reduction clause can not be used as a stand-alone directive.
3557
3558 // do not call __kmp_serial_initialize(), it will be called by
3559 // __kmp_parallel_initialize() if needed
3560 // possible detection of false-positive race by the threadchecker ???
3561 if (!TCR_4(__kmp_init_parallel))
3562 __kmp_parallel_initialize();
3563
3564 __kmp_resume_if_soft_paused();
3565
3566// check correctness of reduce block nesting
3567#if KMP_USE_DYNAMIC_LOCK
3568 if (__kmp_env_consistency_check)
3569 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3570#else
3571 if (__kmp_env_consistency_check)
3572 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3573#endif
3574
3575 th = __kmp_thread_from_gtid(global_tid);
3576 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3577
3578 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3579 // the value should be kept in a variable
3580 // the variable should be either a construct-specific or thread-specific
3581 // property, not a team specific property
3582 // (a thread can reach the next reduce block on the next construct, reduce
3583 // method may differ on the next construct)
3584 // an ident_t "loc" parameter could be used as a construct-specific property
3585 // (what if loc == 0?)
3586 // (if both construct-specific and team-specific variables were shared,
3587 // then unness extra syncs should be needed)
3588 // a thread-specific variable is better regarding two issues above (next
3589 // construct and extra syncs)
3590 // a thread-specific "th_local.reduction_method" variable is used currently
3591 // each thread executes 'determine' and 'set' lines (no need to execute by one
3592 // thread, to avoid unness extra syncs)
3593
3594 packed_reduction_method = __kmp_determine_reduction_method(
3595 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3596 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3597
3598 OMPT_REDUCTION_DECL(th, global_tid);
3599 if (packed_reduction_method == critical_reduce_block) {
3600
3601 OMPT_REDUCTION_BEGIN;
3602
3603 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3604 retval = 1;
3605
3606 } else if (packed_reduction_method == empty_reduce_block) {
3607
3608 OMPT_REDUCTION_BEGIN;
3609
3610 // usage: if team size == 1, no synchronization is required ( Intel
3611 // platforms only )
3612 retval = 1;
3613
3614 } else if (packed_reduction_method == atomic_reduce_block) {
3615
3616 retval = 2;
3617
3618 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3619 // won't be called by the code gen)
3620 // (it's not quite good, because the checking block has been closed by
3621 // this 'pop',
3622 // but atomic operation has not been executed yet, will be executed
3623 // slightly later, literally on next instruction)
3624 if (__kmp_env_consistency_check)
3625 __kmp_pop_sync(global_tid, ct_reduce, loc);
3626
3627 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3628 tree_reduce_block)) {
3629
3630// AT: performance issue: a real barrier here
3631// AT: (if primary thread is slow, other threads are blocked here waiting for
3632// the primary thread to come and release them)
3633// AT: (it's not what a customer might expect specifying NOWAIT clause)
3634// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3635// be confusing to a customer)
3636// AT: another implementation of *barrier_gather*nowait() (or some other design)
3637// might go faster and be more in line with sense of NOWAIT
3638// AT: TO DO: do epcc test and compare times
3639
3640// this barrier should be invisible to a customer and to the threading profile
3641// tool (it's neither a terminating barrier nor customer's code, it's
3642// used for an internal purpose)
3643#if OMPT_SUPPORT
3644 // JP: can this barrier potentially leed to task scheduling?
3645 // JP: as long as there is a barrier in the implementation, OMPT should and
3646 // will provide the barrier events
3647 // so we set-up the necessary frame/return addresses.
3648 ompt_frame_t *ompt_frame;
3649 if (ompt_enabled.enabled) {
3650 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3651 if (ompt_frame->enter_frame.ptr == NULL)
3652 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3653 }
3654 OMPT_STORE_RETURN_ADDRESS(global_tid);
3655#endif
3656#if USE_ITT_NOTIFY
3657 __kmp_threads[global_tid]->th.th_ident = loc;
3658#endif
3659 retval =
3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3661 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3662 retval = (retval != 0) ? (0) : (1);
3663#if OMPT_SUPPORT && OMPT_OPTIONAL
3664 if (ompt_enabled.enabled) {
3665 ompt_frame->enter_frame = ompt_data_none;
3666 }
3667#endif
3668
3669 // all other workers except primary thread should do this pop here
3670 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3671 if (__kmp_env_consistency_check) {
3672 if (retval == 0) {
3673 __kmp_pop_sync(global_tid, ct_reduce, loc);
3674 }
3675 }
3676
3677 } else {
3678
3679 // should never reach this block
3680 KMP_ASSERT(0); // "unexpected method"
3681 }
3682 if (teams_swapped) {
3683 __kmp_restore_swapped_teams(th, team, task_state);
3684 }
3685 KA_TRACE(
3686 10,
3687 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3688 global_tid, packed_reduction_method, retval));
3689
3690 return retval;
3691}
3692
3701void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3702 kmp_critical_name *lck) {
3703
3704 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3705
3706 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3707 __kmp_assert_valid_gtid(global_tid);
3708
3709 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3710
3711 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3712
3713 if (packed_reduction_method == critical_reduce_block) {
3714
3715 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3716 OMPT_REDUCTION_END;
3717
3718 } else if (packed_reduction_method == empty_reduce_block) {
3719
3720 // usage: if team size == 1, no synchronization is required ( on Intel
3721 // platforms only )
3722
3723 OMPT_REDUCTION_END;
3724
3725 } else if (packed_reduction_method == atomic_reduce_block) {
3726
3727 // neither primary thread nor other workers should get here
3728 // (code gen does not generate this call in case 2: atomic reduce block)
3729 // actually it's better to remove this elseif at all;
3730 // after removal this value will checked by the 'else' and will assert
3731
3732 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3733 tree_reduce_block)) {
3734
3735 // only primary thread gets here
3736 // OMPT: tree reduction is annotated in the barrier code
3737
3738 } else {
3739
3740 // should never reach this block
3741 KMP_ASSERT(0); // "unexpected method"
3742 }
3743
3744 if (__kmp_env_consistency_check)
3745 __kmp_pop_sync(global_tid, ct_reduce, loc);
3746
3747 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3748 global_tid, packed_reduction_method));
3749
3750 return;
3751}
3752
3753/* 2.a.ii. Reduce Block with a terminating barrier */
3754
3770kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3771 size_t reduce_size, void *reduce_data,
3772 void (*reduce_func)(void *lhs_data, void *rhs_data),
3773 kmp_critical_name *lck) {
3774 KMP_COUNT_BLOCK(REDUCE_wait);
3775 int retval = 0;
3776 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3777 kmp_info_t *th;
3778 kmp_team_t *team;
3779 int teams_swapped = 0, task_state;
3780
3781 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3782 __kmp_assert_valid_gtid(global_tid);
3783
3784 // why do we need this initialization here at all?
3785 // Reduction clause can not be a stand-alone directive.
3786
3787 // do not call __kmp_serial_initialize(), it will be called by
3788 // __kmp_parallel_initialize() if needed
3789 // possible detection of false-positive race by the threadchecker ???
3790 if (!TCR_4(__kmp_init_parallel))
3791 __kmp_parallel_initialize();
3792
3793 __kmp_resume_if_soft_paused();
3794
3795// check correctness of reduce block nesting
3796#if KMP_USE_DYNAMIC_LOCK
3797 if (__kmp_env_consistency_check)
3798 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3799#else
3800 if (__kmp_env_consistency_check)
3801 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3802#endif
3803
3804 th = __kmp_thread_from_gtid(global_tid);
3805 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3806
3807 packed_reduction_method = __kmp_determine_reduction_method(
3808 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3809 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3810
3811 OMPT_REDUCTION_DECL(th, global_tid);
3812
3813 if (packed_reduction_method == critical_reduce_block) {
3814
3815 OMPT_REDUCTION_BEGIN;
3816 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3817 retval = 1;
3818
3819 } else if (packed_reduction_method == empty_reduce_block) {
3820
3821 OMPT_REDUCTION_BEGIN;
3822 // usage: if team size == 1, no synchronization is required ( Intel
3823 // platforms only )
3824 retval = 1;
3825
3826 } else if (packed_reduction_method == atomic_reduce_block) {
3827
3828 retval = 2;
3829
3830 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3831 tree_reduce_block)) {
3832
3833// case tree_reduce_block:
3834// this barrier should be visible to a customer and to the threading profile
3835// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3836#if OMPT_SUPPORT
3837 ompt_frame_t *ompt_frame;
3838 if (ompt_enabled.enabled) {
3839 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3840 if (ompt_frame->enter_frame.ptr == NULL)
3841 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3842 }
3843 OMPT_STORE_RETURN_ADDRESS(global_tid);
3844#endif
3845#if USE_ITT_NOTIFY
3846 __kmp_threads[global_tid]->th.th_ident =
3847 loc; // needed for correct notification of frames
3848#endif
3849 retval =
3850 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3851 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3852 retval = (retval != 0) ? (0) : (1);
3853#if OMPT_SUPPORT && OMPT_OPTIONAL
3854 if (ompt_enabled.enabled) {
3855 ompt_frame->enter_frame = ompt_data_none;
3856 }
3857#endif
3858
3859 // all other workers except primary thread should do this pop here
3860 // (none of other workers except primary will enter __kmpc_end_reduce())
3861 if (__kmp_env_consistency_check) {
3862 if (retval == 0) { // 0: all other workers; 1: primary thread
3863 __kmp_pop_sync(global_tid, ct_reduce, loc);
3864 }
3865 }
3866
3867 } else {
3868
3869 // should never reach this block
3870 KMP_ASSERT(0); // "unexpected method"
3871 }
3872 if (teams_swapped) {
3873 __kmp_restore_swapped_teams(th, team, task_state);
3874 }
3875
3876 KA_TRACE(10,
3877 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3878 global_tid, packed_reduction_method, retval));
3879 return retval;
3880}
3881
3892void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3893 kmp_critical_name *lck) {
3894
3895 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3896 kmp_info_t *th;
3897 kmp_team_t *team;
3898 int teams_swapped = 0, task_state;
3899
3900 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3901 __kmp_assert_valid_gtid(global_tid);
3902
3903 th = __kmp_thread_from_gtid(global_tid);
3904 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3905
3906 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3907
3908 // this barrier should be visible to a customer and to the threading profile
3909 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3910 OMPT_REDUCTION_DECL(th, global_tid);
3911
3912 if (packed_reduction_method == critical_reduce_block) {
3913 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3914
3915 OMPT_REDUCTION_END;
3916
3917// TODO: implicit barrier: should be exposed
3918#if OMPT_SUPPORT
3919 ompt_frame_t *ompt_frame;
3920 if (ompt_enabled.enabled) {
3921 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3922 if (ompt_frame->enter_frame.ptr == NULL)
3923 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3924 }
3925 OMPT_STORE_RETURN_ADDRESS(global_tid);
3926#endif
3927#if USE_ITT_NOTIFY
3928 __kmp_threads[global_tid]->th.th_ident = loc;
3929#endif
3930 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3931#if OMPT_SUPPORT && OMPT_OPTIONAL
3932 if (ompt_enabled.enabled) {
3933 ompt_frame->enter_frame = ompt_data_none;
3934 }
3935#endif
3936
3937 } else if (packed_reduction_method == empty_reduce_block) {
3938
3939 OMPT_REDUCTION_END;
3940
3941// usage: if team size==1, no synchronization is required (Intel platforms only)
3942
3943// TODO: implicit barrier: should be exposed
3944#if OMPT_SUPPORT
3945 ompt_frame_t *ompt_frame;
3946 if (ompt_enabled.enabled) {
3947 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3948 if (ompt_frame->enter_frame.ptr == NULL)
3949 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3950 }
3951 OMPT_STORE_RETURN_ADDRESS(global_tid);
3952#endif
3953#if USE_ITT_NOTIFY
3954 __kmp_threads[global_tid]->th.th_ident = loc;
3955#endif
3956 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3957#if OMPT_SUPPORT && OMPT_OPTIONAL
3958 if (ompt_enabled.enabled) {
3959 ompt_frame->enter_frame = ompt_data_none;
3960 }
3961#endif
3962
3963 } else if (packed_reduction_method == atomic_reduce_block) {
3964
3965#if OMPT_SUPPORT
3966 ompt_frame_t *ompt_frame;
3967 if (ompt_enabled.enabled) {
3968 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3969 if (ompt_frame->enter_frame.ptr == NULL)
3970 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3971 }
3972 OMPT_STORE_RETURN_ADDRESS(global_tid);
3973#endif
3974// TODO: implicit barrier: should be exposed
3975#if USE_ITT_NOTIFY
3976 __kmp_threads[global_tid]->th.th_ident = loc;
3977#endif
3978 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3979#if OMPT_SUPPORT && OMPT_OPTIONAL
3980 if (ompt_enabled.enabled) {
3981 ompt_frame->enter_frame = ompt_data_none;
3982 }
3983#endif
3984
3985 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3986 tree_reduce_block)) {
3987
3988 // only primary thread executes here (primary releases all other workers)
3989 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3990 global_tid);
3991
3992 } else {
3993
3994 // should never reach this block
3995 KMP_ASSERT(0); // "unexpected method"
3996 }
3997 if (teams_swapped) {
3998 __kmp_restore_swapped_teams(th, team, task_state);
3999 }
4000
4001 if (__kmp_env_consistency_check)
4002 __kmp_pop_sync(global_tid, ct_reduce, loc);
4003
4004 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4005 global_tid, packed_reduction_method));
4006
4007 return;
4008}
4009
4010#undef __KMP_GET_REDUCTION_METHOD
4011#undef __KMP_SET_REDUCTION_METHOD
4012
4013/* end of interface to fast scalable reduce routines */
4014
4015kmp_uint64 __kmpc_get_taskid() {
4016
4017 kmp_int32 gtid;
4018 kmp_info_t *thread;
4019
4020 gtid = __kmp_get_gtid();
4021 if (gtid < 0) {
4022 return 0;
4023 }
4024 thread = __kmp_thread_from_gtid(gtid);
4025 return thread->th.th_current_task->td_task_id;
4026
4027} // __kmpc_get_taskid
4028
4029kmp_uint64 __kmpc_get_parent_taskid() {
4030
4031 kmp_int32 gtid;
4032 kmp_info_t *thread;
4033 kmp_taskdata_t *parent_task;
4034
4035 gtid = __kmp_get_gtid();
4036 if (gtid < 0) {
4037 return 0;
4038 }
4039 thread = __kmp_thread_from_gtid(gtid);
4040 parent_task = thread->th.th_current_task->td_parent;
4041 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4042
4043} // __kmpc_get_parent_taskid
4044
4056void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4057 const struct kmp_dim *dims) {
4058 __kmp_assert_valid_gtid(gtid);
4059 int j, idx;
4060 kmp_int64 last, trace_count;
4061 kmp_info_t *th = __kmp_threads[gtid];
4062 kmp_team_t *team = th->th.th_team;
4063 kmp_uint32 *flags;
4064 kmp_disp_t *pr_buf = th->th.th_dispatch;
4065 dispatch_shared_info_t *sh_buf;
4066
4067 KA_TRACE(
4068 20,
4069 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4070 gtid, num_dims, !team->t.t_serialized));
4071 KMP_DEBUG_ASSERT(dims != NULL);
4072 KMP_DEBUG_ASSERT(num_dims > 0);
4073
4074 if (team->t.t_serialized) {
4075 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4076 return; // no dependencies if team is serialized
4077 }
4078 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4079 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4080 // the next loop
4081 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4082
4083 // Save bounds info into allocated private buffer
4084 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4085 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4086 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4087 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4088 pr_buf->th_doacross_info[0] =
4089 (kmp_int64)num_dims; // first element is number of dimensions
4090 // Save also address of num_done in order to access it later without knowing
4091 // the buffer index
4092 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4093 pr_buf->th_doacross_info[2] = dims[0].lo;
4094 pr_buf->th_doacross_info[3] = dims[0].up;
4095 pr_buf->th_doacross_info[4] = dims[0].st;
4096 last = 5;
4097 for (j = 1; j < num_dims; ++j) {
4098 kmp_int64
4099 range_length; // To keep ranges of all dimensions but the first dims[0]
4100 if (dims[j].st == 1) { // most common case
4101 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4102 range_length = dims[j].up - dims[j].lo + 1;
4103 } else {
4104 if (dims[j].st > 0) {
4105 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4106 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4107 } else { // negative increment
4108 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4109 range_length =
4110 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4111 }
4112 }
4113 pr_buf->th_doacross_info[last++] = range_length;
4114 pr_buf->th_doacross_info[last++] = dims[j].lo;
4115 pr_buf->th_doacross_info[last++] = dims[j].up;
4116 pr_buf->th_doacross_info[last++] = dims[j].st;
4117 }
4118
4119 // Compute total trip count.
4120 // Start with range of dims[0] which we don't need to keep in the buffer.
4121 if (dims[0].st == 1) { // most common case
4122 trace_count = dims[0].up - dims[0].lo + 1;
4123 } else if (dims[0].st > 0) {
4124 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4125 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4126 } else { // negative increment
4127 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4128 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4129 }
4130 for (j = 1; j < num_dims; ++j) {
4131 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4132 }
4133 KMP_DEBUG_ASSERT(trace_count > 0);
4134
4135 // Check if shared buffer is not occupied by other loop (idx -
4136 // __kmp_dispatch_num_buffers)
4137 if (idx != sh_buf->doacross_buf_idx) {
4138 // Shared buffer is occupied, wait for it to be free
4139 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4140 __kmp_eq_4, NULL);
4141 }
4142#if KMP_32_BIT_ARCH
4143 // Check if we are the first thread. After the CAS the first thread gets 0,
4144 // others get 1 if initialization is in progress, allocated pointer otherwise.
4145 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4146 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4147 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4148#else
4149 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4150 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4151#endif
4152 if (flags == NULL) {
4153 // we are the first thread, allocate the array of flags
4154 size_t size =
4155 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4156 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4157 KMP_MB();
4158 sh_buf->doacross_flags = flags;
4159 } else if (flags == (kmp_uint32 *)1) {
4160#if KMP_32_BIT_ARCH
4161 // initialization is still in progress, need to wait
4162 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4163#else
4164 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4165#endif
4166 KMP_YIELD(TRUE);
4167 KMP_MB();
4168 } else {
4169 KMP_MB();
4170 }
4171 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4172 pr_buf->th_doacross_flags =
4173 sh_buf->doacross_flags; // save private copy in order to not
4174 // touch shared buffer on each iteration
4175 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4176}
4177
4178void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4179 __kmp_assert_valid_gtid(gtid);
4180 kmp_int64 shft;
4181 size_t num_dims, i;
4182 kmp_uint32 flag;
4183 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4184 kmp_info_t *th = __kmp_threads[gtid];
4185 kmp_team_t *team = th->th.th_team;
4186 kmp_disp_t *pr_buf;
4187 kmp_int64 lo, up, st;
4188
4189 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4190 if (team->t.t_serialized) {
4191 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4192 return; // no dependencies if team is serialized
4193 }
4194
4195 // calculate sequential iteration number and check out-of-bounds condition
4196 pr_buf = th->th.th_dispatch;
4197 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4198 num_dims = (size_t)pr_buf->th_doacross_info[0];
4199 lo = pr_buf->th_doacross_info[2];
4200 up = pr_buf->th_doacross_info[3];
4201 st = pr_buf->th_doacross_info[4];
4202#if OMPT_SUPPORT && OMPT_OPTIONAL
4203 ompt_dependence_t deps[num_dims];
4204#endif
4205 if (st == 1) { // most common case
4206 if (vec[0] < lo || vec[0] > up) {
4207 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4208 "bounds [%lld,%lld]\n",
4209 gtid, vec[0], lo, up));
4210 return;
4211 }
4212 iter_number = vec[0] - lo;
4213 } else if (st > 0) {
4214 if (vec[0] < lo || vec[0] > up) {
4215 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4216 "bounds [%lld,%lld]\n",
4217 gtid, vec[0], lo, up));
4218 return;
4219 }
4220 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4221 } else { // negative increment
4222 if (vec[0] > lo || vec[0] < up) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4224 "bounds [%lld,%lld]\n",
4225 gtid, vec[0], lo, up));
4226 return;
4227 }
4228 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4229 }
4230#if OMPT_SUPPORT && OMPT_OPTIONAL
4231 deps[0].variable.value = iter_number;
4232 deps[0].dependence_type = ompt_dependence_type_sink;
4233#endif
4234 for (i = 1; i < num_dims; ++i) {
4235 kmp_int64 iter, ln;
4236 size_t j = i * 4;
4237 ln = pr_buf->th_doacross_info[j + 1];
4238 lo = pr_buf->th_doacross_info[j + 2];
4239 up = pr_buf->th_doacross_info[j + 3];
4240 st = pr_buf->th_doacross_info[j + 4];
4241 if (st == 1) {
4242 if (vec[i] < lo || vec[i] > up) {
4243 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4244 "bounds [%lld,%lld]\n",
4245 gtid, vec[i], lo, up));
4246 return;
4247 }
4248 iter = vec[i] - lo;
4249 } else if (st > 0) {
4250 if (vec[i] < lo || vec[i] > up) {
4251 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4252 "bounds [%lld,%lld]\n",
4253 gtid, vec[i], lo, up));
4254 return;
4255 }
4256 iter = (kmp_uint64)(vec[i] - lo) / st;
4257 } else { // st < 0
4258 if (vec[i] > lo || vec[i] < up) {
4259 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4260 "bounds [%lld,%lld]\n",
4261 gtid, vec[i], lo, up));
4262 return;
4263 }
4264 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4265 }
4266 iter_number = iter + ln * iter_number;
4267#if OMPT_SUPPORT && OMPT_OPTIONAL
4268 deps[i].variable.value = iter;
4269 deps[i].dependence_type = ompt_dependence_type_sink;
4270#endif
4271 }
4272 shft = iter_number % 32; // use 32-bit granularity
4273 iter_number >>= 5; // divided by 32
4274 flag = 1 << shft;
4275 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4276 KMP_YIELD(TRUE);
4277 }
4278 KMP_MB();
4279#if OMPT_SUPPORT && OMPT_OPTIONAL
4280 if (ompt_enabled.ompt_callback_dependences) {
4281 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4282 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4283 }
4284#endif
4285 KA_TRACE(20,
4286 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4287 gtid, (iter_number << 5) + shft));
4288}
4289
4290void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4291 __kmp_assert_valid_gtid(gtid);
4292 kmp_int64 shft;
4293 size_t num_dims, i;
4294 kmp_uint32 flag;
4295 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4296 kmp_info_t *th = __kmp_threads[gtid];
4297 kmp_team_t *team = th->th.th_team;
4298 kmp_disp_t *pr_buf;
4299 kmp_int64 lo, st;
4300
4301 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4302 if (team->t.t_serialized) {
4303 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4304 return; // no dependencies if team is serialized
4305 }
4306
4307 // calculate sequential iteration number (same as in "wait" but no
4308 // out-of-bounds checks)
4309 pr_buf = th->th.th_dispatch;
4310 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4311 num_dims = (size_t)pr_buf->th_doacross_info[0];
4312 lo = pr_buf->th_doacross_info[2];
4313 st = pr_buf->th_doacross_info[4];
4314#if OMPT_SUPPORT && OMPT_OPTIONAL
4315 ompt_dependence_t deps[num_dims];
4316#endif
4317 if (st == 1) { // most common case
4318 iter_number = vec[0] - lo;
4319 } else if (st > 0) {
4320 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4321 } else { // negative increment
4322 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4323 }
4324#if OMPT_SUPPORT && OMPT_OPTIONAL
4325 deps[0].variable.value = iter_number;
4326 deps[0].dependence_type = ompt_dependence_type_source;
4327#endif
4328 for (i = 1; i < num_dims; ++i) {
4329 kmp_int64 iter, ln;
4330 size_t j = i * 4;
4331 ln = pr_buf->th_doacross_info[j + 1];
4332 lo = pr_buf->th_doacross_info[j + 2];
4333 st = pr_buf->th_doacross_info[j + 4];
4334 if (st == 1) {
4335 iter = vec[i] - lo;
4336 } else if (st > 0) {
4337 iter = (kmp_uint64)(vec[i] - lo) / st;
4338 } else { // st < 0
4339 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4340 }
4341 iter_number = iter + ln * iter_number;
4342#if OMPT_SUPPORT && OMPT_OPTIONAL
4343 deps[i].variable.value = iter;
4344 deps[i].dependence_type = ompt_dependence_type_source;
4345#endif
4346 }
4347#if OMPT_SUPPORT && OMPT_OPTIONAL
4348 if (ompt_enabled.ompt_callback_dependences) {
4349 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4350 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4351 }
4352#endif
4353 shft = iter_number % 32; // use 32-bit granularity
4354 iter_number >>= 5; // divided by 32
4355 flag = 1 << shft;
4356 KMP_MB();
4357 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4358 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4359 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4360 (iter_number << 5) + shft));
4361}
4362
4363void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4364 __kmp_assert_valid_gtid(gtid);
4365 kmp_int32 num_done;
4366 kmp_info_t *th = __kmp_threads[gtid];
4367 kmp_team_t *team = th->th.th_team;
4368 kmp_disp_t *pr_buf = th->th.th_dispatch;
4369
4370 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4371 if (team->t.t_serialized) {
4372 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4373 return; // nothing to do
4374 }
4375 num_done =
4376 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4377 if (num_done == th->th.th_team_nproc) {
4378 // we are the last thread, need to free shared resources
4379 int idx = pr_buf->th_doacross_buf_idx - 1;
4380 dispatch_shared_info_t *sh_buf =
4381 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4382 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4383 (kmp_int64)&sh_buf->doacross_num_done);
4384 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4385 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4386 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4387 sh_buf->doacross_flags = NULL;
4388 sh_buf->doacross_num_done = 0;
4389 sh_buf->doacross_buf_idx +=
4390 __kmp_dispatch_num_buffers; // free buffer for future re-use
4391 }
4392 // free private resources (need to keep buffer index forever)
4393 pr_buf->th_doacross_flags = NULL;
4394 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4395 pr_buf->th_doacross_info = NULL;
4396 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4397}
4398
4399/* OpenMP 5.1 Memory Management routines */
4400void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4401 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4402}
4403
4404void *omp_aligned_alloc(size_t align, size_t size,
4405 omp_allocator_handle_t allocator) {
4406 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4407}
4408
4409void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4410 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4411}
4412
4413void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4414 omp_allocator_handle_t allocator) {
4415 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4416}
4417
4418void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4419 omp_allocator_handle_t free_allocator) {
4420 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4421 free_allocator);
4422}
4423
4424void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4425 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4426}
4427/* end of OpenMP 5.1 Memory Management routines */
4428
4429int __kmpc_get_target_offload(void) {
4430 if (!__kmp_init_serial) {
4431 __kmp_serial_initialize();
4432 }
4433 return __kmp_target_offload;
4434}
4435
4436int __kmpc_pause_resource(kmp_pause_status_t level) {
4437 if (!__kmp_init_serial) {
4438 return 1; // Can't pause if runtime is not initialized
4439 }
4440 return __kmp_pause_resource(level);
4441}
4442
4443void __kmpc_error(ident_t *loc, int severity, const char *message) {
4444 if (!__kmp_init_serial)
4445 __kmp_serial_initialize();
4446
4447 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4448
4449#if OMPT_SUPPORT
4450 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4451 ompt_callbacks.ompt_callback(ompt_callback_error)(
4452 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4453 OMPT_GET_RETURN_ADDRESS(0));
4454 }
4455#endif // OMPT_SUPPORT
4456
4457 char *src_loc;
4458 if (loc && loc->psource) {
4459 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4460 src_loc =
4461 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4462 __kmp_str_loc_free(&str_loc);
4463 } else {
4464 src_loc = __kmp_str_format("unknown");
4465 }
4466
4467 if (severity == severity_warning)
4468 KMP_WARNING(UserDirectedWarning, src_loc, message);
4469 else
4470 KMP_FATAL(UserDirectedError, src_loc, message);
4471
4472 __kmp_str_free(&src_loc);
4473}
4474
4475// Mark begin of scope directive.
4476void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4477// reserved is for extension of scope directive and not used.
4478#if OMPT_SUPPORT && OMPT_OPTIONAL
4479 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4480 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4481 int tid = __kmp_tid_from_gtid(gtid);
4482 ompt_callbacks.ompt_callback(ompt_callback_work)(
4483 ompt_work_scope, ompt_scope_begin,
4484 &(team->t.ompt_team_info.parallel_data),
4485 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4486 OMPT_GET_RETURN_ADDRESS(0));
4487 }
4488#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4489}
4490
4491// Mark end of scope directive
4492void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4493// reserved is for extension of scope directive and not used.
4494#if OMPT_SUPPORT && OMPT_OPTIONAL
4495 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4496 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4497 int tid = __kmp_tid_from_gtid(gtid);
4498 ompt_callbacks.ompt_callback(ompt_callback_work)(
4499 ompt_work_scope, ompt_scope_end,
4500 &(team->t.ompt_team_info.parallel_data),
4501 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4502 OMPT_GET_RETURN_ADDRESS(0));
4503 }
4504#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4505}
4506
4507#ifdef KMP_USE_VERSION_SYMBOLS
4508// For GOMP compatibility there are two versions of each omp_* API.
4509// One is the plain C symbol and one is the Fortran symbol with an appended
4510// underscore. When we implement a specific ompc_* version of an omp_*
4511// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4512// instead of the Fortran versions in kmp_ftn_entry.h
4513extern "C" {
4514// Have to undef these from omp.h so they aren't translated into
4515// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4516#ifdef omp_set_affinity_format
4517#undef omp_set_affinity_format
4518#endif
4519#ifdef omp_get_affinity_format
4520#undef omp_get_affinity_format
4521#endif
4522#ifdef omp_display_affinity
4523#undef omp_display_affinity
4524#endif
4525#ifdef omp_capture_affinity
4526#undef omp_capture_affinity
4527#endif
4528KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4529 "OMP_5.0");
4530KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4531 "OMP_5.0");
4532KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4533 "OMP_5.0");
4534KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4535 "OMP_5.0");
4536} // extern "C"
4537#endif
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition kmp.h:199
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:218
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition kmp.h:1601
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:908
stats_state_e
the states which a thread can be in
Definition kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition kmp.h:234
char const * psource
Definition kmp.h:244
kmp_int32 flags
Definition kmp.h:236