16#include "kmp_config.h"
28#include "kmp_stats_timing.h"
39#define KMP_DEVELOPER_STATS 0
42#define KMP_STATS_HIST 0
95#define KMP_FOREACH_COUNTER(macro, arg) \
96 macro(OMP_PARALLEL,stats_flags_e::onlyInMaster|stats_flags_e::noTotal,arg) \
97 macro(OMP_NESTED_PARALLEL, 0, arg) \
98 macro(OMP_LOOP_STATIC, 0, arg) \
99 macro(OMP_LOOP_STATIC_STEAL, 0, arg) \
100 macro(OMP_LOOP_DYNAMIC, 0, arg) \
101 macro(OMP_DISTRIBUTE, 0, arg) \
102 macro(OMP_BARRIER, 0, arg) \
103 macro(OMP_CRITICAL, 0, arg) \
104 macro(OMP_SINGLE, 0, arg) \
105 macro(OMP_MASTER, 0, arg) \
106 macro(OMP_MASKED, 0, arg) \
107 macro(OMP_TEAMS, 0, arg) \
108 macro(OMP_set_lock, 0, arg) \
109 macro(OMP_test_lock, 0, arg) \
110 macro(REDUCE_wait, 0, arg) \
111 macro(REDUCE_nowait, 0, arg) \
112 macro(OMP_TASKYIELD, 0, arg) \
113 macro(OMP_TASKLOOP, 0, arg) \
114 macro(TASK_executed, 0, arg) \
115 macro(TASK_cancelled, 0, arg) \
116 macro(TASK_stolen, 0, arg)
138#define KMP_FOREACH_TIMER(macro, arg) \
139 macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
140 macro (OMP_parallel, stats_flags_e::logEvent, arg) \
141 macro (OMP_parallel_overhead, stats_flags_e::logEvent, arg) \
142 macro (OMP_teams, stats_flags_e::logEvent, arg) \
143 macro (OMP_teams_overhead, stats_flags_e::logEvent, arg) \
144 macro (OMP_loop_static, 0, arg) \
145 macro (OMP_loop_static_scheduling, 0, arg) \
146 macro (OMP_loop_dynamic, 0, arg) \
147 macro (OMP_loop_dynamic_scheduling, 0, arg) \
148 macro (OMP_distribute, 0, arg) \
149 macro (OMP_distribute_scheduling, 0, arg) \
150 macro (OMP_critical, 0, arg) \
151 macro (OMP_critical_wait, 0, arg) \
152 macro (OMP_single, 0, arg) \
153 macro (OMP_master, 0, arg) \
154 macro (OMP_masked, 0, arg) \
155 macro (OMP_task_immediate, 0, arg) \
156 macro (OMP_task_taskwait, 0, arg) \
157 macro (OMP_task_taskyield, 0, arg) \
158 macro (OMP_task_taskgroup, 0, arg) \
159 macro (OMP_task_join_bar, 0, arg) \
160 macro (OMP_task_plain_bar, 0, arg) \
161 macro (OMP_taskloop_scheduling, 0, arg) \
162 macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
163 macro (OMP_idle, stats_flags_e::logEvent, arg) \
164 macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
165 macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
166 macro (OMP_serial, stats_flags_e::logEvent, arg) \
167 macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, \
169 macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
171 macro (OMP_loop_static_iterations, \
172 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
173 macro (OMP_loop_static_total_iterations, \
174 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
175 macro (OMP_loop_dynamic_iterations, \
176 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
177 macro (OMP_loop_dynamic_total_iterations, \
178 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
179 macro (OMP_distribute_iterations, \
180 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
181 KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
233#if (KMP_DEVELOPER_STATS)
252#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
253 macro(KMP_fork_call, 0, arg) \
254 macro(KMP_join_call, 0, arg) \
255 macro(KMP_end_split_barrier, 0, arg) \
256 macro(KMP_hier_gather, 0, arg) \
257 macro(KMP_hier_release, 0, arg) \
258 macro(KMP_hyper_gather, 0, arg) \
259 macro(KMP_hyper_release, 0, arg) \
260 macro(KMP_dist_gather, 0, arg) \
261 macro(KMP_dist_release, 0, arg) \
262 macro(KMP_linear_gather, 0, arg) \
263 macro(KMP_linear_release, 0, arg) \
264 macro(KMP_tree_gather, 0, arg) \
265 macro(KMP_tree_release, 0, arg) \
266 macro(USER_resume, 0, arg) \
267 macro(USER_suspend, 0, arg) \
268 macro(USER_mwait, 0, arg) \
269 macro(KMP_allocate_team, 0, arg) \
270 macro(KMP_setup_icv_copy, 0, arg) \
271 macro(USER_icv_copy, 0, arg) \
272 macro (FOR_static_steal_stolen, \
273 stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
274 macro (FOR_static_steal_chunks, \
275 stats_flags_e::noUnits | stats_flags_e::noTotal, arg)
277#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
300#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
302#define ENUMERATE(name, ignore, prefix) prefix##name,
303enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
305enum explicit_timer_e {
332 uint32_t KMP_ALIGN_CACHE zeroCount;
338 static double binMax[numBins];
344 uint64_t t = zeroCount;
345 for (
int i = 0; i < numBins; i++)
347 KMP_DEBUG_ASSERT(t == _total);
350 void check()
const {}
354 logHistogram() { reset(); }
356 logHistogram(logHistogram
const &o) {
357 for (
int i = 0; i < numBins; i++)
366 for (
int i = 0; i < numBins; i++) {
375 uint32_t count(
int b)
const {
return bins[b + logOffset].count; }
376 double total(
int b)
const {
return bins[b + logOffset].total; }
377 static uint32_t findBin(
double sample);
379 logHistogram &operator+=(logHistogram
const &o) {
380 zeroCount += o.zeroCount;
381 for (
int i = 0; i < numBins; i++) {
382 bins[i].count += o.bins[i].count;
383 bins[i].total += o.bins[i].total;
393 void addSample(
double sample);
397 std::string format(
char)
const;
401 double KMP_ALIGN_CACHE minVal;
405 uint64_t sampleCount;
411 statistic(
bool doHist =
bool(KMP_STATS_HIST)) {
413 collectingHist = doHist;
415 statistic(statistic
const &o)
416 : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
417 sampleCount(o.sampleCount), offset(o.offset),
418 collectingHist(o.collectingHist), hist(o.hist) {}
419 statistic(
double minv,
double maxv,
double meanv, uint64_t sc,
double sd)
420 : minVal(minv), maxVal(maxv), meanVal(meanv), m2(sd * sd * sc),
421 sampleCount(sc), offset(0.0), collectingHist(false) {}
422 bool haveHist()
const {
return collectingHist; }
423 double getMin()
const {
return minVal; }
424 double getMean()
const {
return meanVal; }
425 double getMax()
const {
return maxVal; }
426 uint64_t getCount()
const {
return sampleCount; }
427 double getSD()
const {
return sqrt(m2 / sampleCount); }
428 double getTotal()
const {
return sampleCount * meanVal; }
429 logHistogram
const *getHist()
const {
return &hist; }
430 void setOffset(
double d) { offset = d; }
433 minVal = (std::numeric_limits<double>::max)();
441 void addSample(
double sample);
442 void scale(
double factor);
443 void scaleDown(
double f) { scale(1. / f); }
444 void forceCount(uint64_t count) { sampleCount = count; }
445 statistic &operator+=(statistic
const &other);
447 std::string format(
char unit,
bool total =
false)
const;
448 std::string formatHist(
char unit)
const {
return hist.format(unit); }
456class timeStat :
public statistic {
457 static statInfo timerInfo[];
460 timeStat() : statistic() {}
461 static const char *name(timer_e e) {
return timerInfo[e].name; }
462 static bool noTotal(timer_e e) {
465 static bool masterOnly(timer_e e) {
468 static bool workerOnly(timer_e e) {
471 static bool noUnits(timer_e e) {
477 static void clearEventFlags() {
478 for (
int i = 0; i < TIMER_LAST; i++) {
489 timer_e timerEnumValue;
490 tsc_tick_count startTime;
491 tsc_tick_count pauseStartTime;
492 tsc_tick_count::tsc_interval_t totalPauseTime;
495 explicitTimer(timeStat *s, timer_e te)
496 : stat(s), timerEnumValue(te), startTime(), pauseStartTime(0),
500 void start(tsc_tick_count tick);
501 void pause(tsc_tick_count tick) { pauseStartTime = tick; }
502 void resume(tsc_tick_count tick) {
503 totalPauseTime += (tick - pauseStartTime);
505 void stop(tsc_tick_count tick, kmp_stats_list *stats_ptr =
nullptr);
511 timer_e get_type()
const {
return timerEnumValue; }
520class partitionedTimers {
522 std::vector<explicitTimer> timer_stack;
526 void init(explicitTimer timer);
527 void exchange(explicitTimer timer);
528 void push(explicitTimer timer);
535class blockPartitionedTimer {
536 partitionedTimers *part_timers;
539 blockPartitionedTimer(partitionedTimers *pt, explicitTimer timer)
541 part_timers->push(timer);
543 ~blockPartitionedTimer() { part_timers->pop(); }
549class blockThreadState {
555 : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
556 *state_pointer = new_state;
558 ~blockThreadState() { *state_pointer = old_state; }
566 static const statInfo counterInfo[];
569 counter() : value(0) {}
570 void increment() { value++; }
571 uint64_t getValue()
const {
return value; }
572 void reset() { value = 0; }
573 static const char *name(counter_e e) {
return counterInfo[e].name; }
574 static bool masterOnly(counter_e e) {
612class kmp_stats_event {
620 : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
621 kmp_stats_event(uint64_t strt, uint64_t stp,
int nst, timer_e nme)
622 : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
623 inline uint64_t getStart()
const {
return start; }
624 inline uint64_t getStop()
const {
return stop; }
625 inline int getNestLevel()
const {
return nest_level; }
626 inline timer_e getTimerName()
const {
return timer_name; }
655class kmp_stats_event_vector {
656 kmp_stats_event *events;
659 static const int INIT_SIZE = 1024;
662 kmp_stats_event_vector() {
664 (kmp_stats_event *)__kmp_allocate(
sizeof(kmp_stats_event) * INIT_SIZE);
666 allocated_size = INIT_SIZE;
668 ~kmp_stats_event_vector() {}
669 inline void reset() { internal_size = 0; }
670 inline int size()
const {
return internal_size; }
671 void push_back(uint64_t start_time, uint64_t stop_time,
int nest_level,
674 if (internal_size == allocated_size) {
675 kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
676 sizeof(kmp_stats_event) * allocated_size * 2);
677 for (i = 0; i < internal_size; i++)
683 events[internal_size] =
684 kmp_stats_event(start_time, stop_time, nest_level, name);
690 const kmp_stats_event &operator[](
int index)
const {
return events[index]; }
691 kmp_stats_event &operator[](
int index) {
return events[index]; }
692 const kmp_stats_event &at(
int index)
const {
return events[index]; }
693 kmp_stats_event &at(
int index) {
return events[index]; }
723class kmp_stats_list {
725 timeStat _timers[TIMER_LAST + 1];
726 counter _counters[COUNTER_LAST + 1];
727 explicitTimer thread_life_timer;
728 partitionedTimers _partitionedTimers;
730 kmp_stats_event_vector _event_vector;
731 kmp_stats_list *next;
732 kmp_stats_list *prev;
734 int thread_is_idle_flag;
738 : thread_life_timer(&_timers[TIMER_OMP_worker_thread_life],
739 TIMER_OMP_worker_thread_life),
740 _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
741 thread_is_idle_flag(0) {}
743 inline timeStat *getTimer(timer_e idx) {
return &_timers[idx]; }
744 inline counter *getCounter(counter_e idx) {
return &_counters[idx]; }
745 inline partitionedTimers *getPartitionedTimers() {
746 return &_partitionedTimers;
748 inline timeStat *getTimers() {
return _timers; }
749 inline counter *getCounters() {
return _counters; }
750 inline kmp_stats_event_vector &getEventVector() {
return _event_vector; }
751 inline void startLife() { thread_life_timer.start(tsc_tick_count::now()); }
752 inline void endLife() { thread_life_timer.stop(tsc_tick_count::now(),
this); }
753 inline void resetEventVector() { _event_vector.reset(); }
754 inline void incrementNestValue() { _nestLevel++; }
755 inline int getNestValue() {
return _nestLevel; }
756 inline void decrementNestValue() { _nestLevel--; }
757 inline int getGtid()
const {
return gtid; }
758 inline void setGtid(
int newgtid) { gtid = newgtid; }
759 inline void setState(
stats_state_e newstate) { state = newstate; }
762 inline bool isIdle() {
return thread_is_idle_flag == 1; }
763 inline void setIdleFlag() { thread_is_idle_flag = 1; }
764 inline void resetIdleFlag() { thread_is_idle_flag = 0; }
765 kmp_stats_list *push_back(
int gtid);
766 inline void push_event(uint64_t start_time, uint64_t stop_time,
767 int nest_level, timer_e name) {
768 _event_vector.push_back(start_time, stop_time, nest_level, name);
772 kmp_stats_list::iterator begin();
773 kmp_stats_list::iterator end();
777 friend kmp_stats_list::iterator kmp_stats_list::begin();
778 friend kmp_stats_list::iterator kmp_stats_list::end();
783 iterator operator++();
784 iterator operator++(
int dummy);
785 iterator operator--();
786 iterator operator--(
int dummy);
787 bool operator!=(
const iterator &rhs);
788 bool operator==(
const iterator &rhs);
789 kmp_stats_list *operator*()
const;
822class kmp_stats_output_module {
832 std::string outputFileName;
833 static const char *eventsFileName;
834 static const char *plotFileName;
835 static int printPerThreadFlag;
836 static int printPerThreadEventsFlag;
837 static const rgb_color globalColorArray[];
838 static rgb_color timerColorInfo[];
841 static void setupEventColors();
842 static void printPloticusFile();
843 static void printHeaderInfo(FILE *statsOut);
844 static void printTimerStats(FILE *statsOut, statistic
const *theStats,
845 statistic
const *totalStats);
846 static void printCounterStats(FILE *statsOut, statistic
const *theStats);
847 static void printCounters(FILE *statsOut, counter
const *theCounters);
848 static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
850 static rgb_color getEventColor(timer_e e) {
return timerColorInfo[e]; }
851 static void windupExplicitTimers();
852 bool eventPrintingEnabled()
const {
return printPerThreadEventsFlag; }
855 kmp_stats_output_module() { init(); }
856 void outputStats(
const char *heading);
862void __kmp_stats_init();
863void __kmp_stats_fini();
864void __kmp_reset_stats();
865void __kmp_output_stats(
const char *);
866void __kmp_accumulate_stats_at_exit(
void);
868extern KMP_THREAD_LOCAL kmp_stats_list *__kmp_stats_thread_ptr;
870extern kmp_stats_list *__kmp_stats_list;
872extern kmp_tas_lock_t __kmp_stats_lock;
874extern tsc_tick_count __kmp_stats_start_time;
876extern kmp_stats_output_module __kmp_stats_output;
895#define KMP_COUNT_VALUE(name, value) \
896 __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample((double)value)
908#define KMP_COUNT_BLOCK(name) \
909 __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
928#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
937#define KMP_INIT_PARTITIONED_TIMERS(name) \
938 __kmp_stats_thread_ptr->getPartitionedTimers()->init(explicitTimer( \
939 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
941#define KMP_TIME_PARTITIONED_BLOCK(name) \
942 blockPartitionedTimer __PBLOCKTIME__( \
943 __kmp_stats_thread_ptr->getPartitionedTimers(), \
944 explicitTimer(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
947#define KMP_PUSH_PARTITIONED_TIMER(name) \
948 __kmp_stats_thread_ptr->getPartitionedTimers()->push(explicitTimer( \
949 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
951#define KMP_POP_PARTITIONED_TIMER() \
952 __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
954#define KMP_EXCHANGE_PARTITIONED_TIMER(name) \
955 __kmp_stats_thread_ptr->getPartitionedTimers()->exchange(explicitTimer( \
956 __kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name))
958#define KMP_SET_THREAD_STATE(state_name) \
959 __kmp_stats_thread_ptr->setState(state_name)
961#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
963#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
964 blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
974#define KMP_RESET_STATS() __kmp_reset_stats()
976#if (KMP_DEVELOPER_STATS)
977#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
978#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
979#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
980#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) KMP_PUSH_PARTITIONED_TIMER(n)
981#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) KMP_POP_PARTITIONED_TIMER(n)
982#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) \
983 KMP_EXCHANGE_PARTITIONED_TIMER(n)
986#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
987#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
988#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
989#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
990#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
991#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
997#define KMP_COUNT_VALUE(n, v) ((void)0)
998#define KMP_COUNT_BLOCK(n) ((void)0)
1000#define KMP_OUTPUT_STATS(heading_string) ((void)0)
1001#define KMP_RESET_STATS() ((void)0)
1003#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
1004#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
1005#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
1006#define KMP_PUSH_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
1007#define KMP_POP_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
1008#define KMP_EXCHANGE_DEVELOPER_PARTITIONED_TIMER(n) ((void)0)
1009#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
1010#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
1011#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
1012#define KMP_POP_PARTITIONED_TIMER() ((void)0)
1013#define KMP_SET_THREAD_STATE(state_name) ((void)0)
1014#define KMP_GET_THREAD_STATE() ((void)0)
1015#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
stats_flags_e
flags to describe the statistic (timer or counter)
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
stats_state_e
the states which a thread can be in
@ notInMaster
statistic is valid only for non-primary threads
@ noUnits
statistic doesn't need units printed next to it
@ noTotal
do not show a TOTAL_aggregation for this statistic
@ onlyInMaster
statistic is valid only for primary thread