1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 
5 #include "precomp.hpp"
6 
7 #include <opencv2/core/utils/trace.hpp>
8 #include <opencv2/core/utils/trace.private.hpp>
9 #include <opencv2/core/utils/configuration.private.hpp>
10 
11 #include <opencv2/core/opencl/ocl_defs.hpp>
12 
13 #include <cstdarg> // va_start
14 
15 #include <sstream>
16 #include <ostream>
17 #include <fstream>
18 
19 #if 0
20 #define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__)
21 #else
22 #define CV_LOG(...) {}
23 #endif
24 
25 #if 0
26 #define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__)
27 #else
28 #define CV_LOG_ITT(...) {}
29 #endif
30 
31 #if 1
32 #define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
33 #else
34 #define CV_LOG_TRACE_BAILOUT(...) {}
35 #endif
36 
37 #if 0
38 #define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
39 #else
40 #define CV_LOG_PARALLEL(...) {}
41 #endif
42 
43 #if 0
44 #define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
45 #else
46 #define CV_LOG_CTX_STAT(...) {}
47 #endif
48 
49 #if 0
50 #define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
51 #else
52 #define CV_LOG_SKIP(...) {}
53 #endif
54 
55 namespace cv {
56 namespace utils {
57 namespace trace {
58 namespace details {
59 
60 #ifdef OPENCV_TRACE
61 
62 #ifdef _MSC_VER
63 #pragma warning(disable:4065) // switch statement contains 'default' but no 'case' labels
64 #endif
65 
66 static int64 g_zero_timestamp = 0;
67 
getTimestamp()68 static int64 getTimestamp()
69 {
70     int64 t = getTickCount();
71     static double tick_to_ns = 1e9 / getTickFrequency();
72     return (int64)((t - g_zero_timestamp) * tick_to_ns);
73 }
74 
getParameterTraceEnable()75 static bool getParameterTraceEnable()
76 {
77     static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false);
78     return param_traceEnable;
79 }
80 
81 // TODO lazy configuration flags
82 static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1);
83 static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000);
84 static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000);
85 
getParameterTraceLocation()86 static const cv::String& getParameterTraceLocation()
87 {
88     static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace");
89     return param_traceLocation;
90 }
91 
92 #ifdef HAVE_OPENCL
93 static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false);
94 #endif
95 
96 #ifdef OPENCV_WITH_ITT
97 static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_PARENT", false);
98 #endif
99 
_spaces(int count)100 static const char* _spaces(int count)
101 {
102     static const char buf[64] =
103 "                                                               ";
104     return &buf[63 - (count & 63)];
105 }
106 
107 /**
108  * Text-based trace messages
109  */
110 class TraceMessage
111 {
112 public:
113     char buffer[1024];
114     size_t len;
115     bool hasError;
116 
TraceMessage()117     TraceMessage() :
118         len(0),
119         hasError(false)
120     {}
121 
printf(const char * format,...)122     bool printf(const char* format, ...)
123     {
124         char* buf = &buffer[len];
125         size_t sz = sizeof(buffer) - len;
126         va_list ap;
127         va_start(ap, format);
128         int n = cv_vsnprintf(buf, (int)sz, format, ap);
129         va_end(ap);
130         if (n < 0 || (size_t)n > sz)
131         {
132             hasError = true;
133             return false;
134         }
135         len += n;
136         return true;
137     }
138 
formatlocation(const Region::LocationStaticStorage & location)139     bool formatlocation(const Region::LocationStaticStorage& location)
140     {
141         return this->printf("l,%lld,\"%s\",%d,\"%s\",0x%llX\n",
142                 (long long int)(*location.ppExtra)->global_location_id,
143                 location.filename,
144                 location.line,
145                 location.name,
146                 (long long int)(location.flags & ~0xF0000000));
147     }
formatRegionEnter(const Region & region)148     bool formatRegionEnter(const Region& region)
149     {
150         bool ok = this->printf("b,%d,%lld,%lld,%lld",
151                 (int)region.pImpl->threadID,
152                 (long long int)region.pImpl->beginTimestamp,
153                 (long long int)((*region.pImpl->location.ppExtra)->global_location_id),
154                 (long long int)region.pImpl->global_region_id);
155         if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl)
156         {
157             if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID)
158                 ok &= this->printf(",parentThread=%d,parent=%lld",
159                         (int)region.pImpl->parentRegion->pImpl->threadID,
160                         (long long int)region.pImpl->parentRegion->pImpl->global_region_id);
161         }
162         ok &= this->printf("\n");
163         return ok;
164     }
formatRegionLeave(const Region & region,const RegionStatistics & result)165     bool formatRegionLeave(const Region& region, const RegionStatistics& result)
166     {
167         CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration);
168         bool ok = this->printf("e,%d,%lld,%lld,%lld,%lld",
169                 (int)region.pImpl->threadID,
170                 (long long int)region.pImpl->endTimestamp,
171                 (long long int)(*region.pImpl->location.ppExtra)->global_location_id,
172                 (long long int)region.pImpl->global_region_id,
173                 (long long int)result.duration);
174         if (result.currentSkippedRegions)
175             ok &= this->printf(",skip=%d", (int)result.currentSkippedRegions);
176 #ifdef HAVE_IPP
177         if (result.durationImplIPP)
178             ok &= this->printf(",tIPP=%lld", (long long int)result.durationImplIPP);
179 #endif
180 #ifdef HAVE_OPENCL
181         if (result.durationImplOpenCL)
182             ok &= this->printf(",tOCL=%lld", (long long int)result.durationImplOpenCL);
183 #endif
184 #ifdef HAVE_OPENVX
185         if (result.durationImplOpenVX)
186             ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX);
187 #endif
188         ok &= this->printf("\n");
189         return ok;
190     }
recordRegionArg(const Region & region,const TraceArg & arg,const char * value)191     bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value)
192     {
193         return this->printf("a,%d,%lld,%lld,\"%s\",\"%s\"\n",
194                 region.pImpl->threadID,
195                 (long long int)region.pImpl->beginTimestamp,
196                 (long long int)region.pImpl->global_region_id,
197                 arg.name,
198                 value);
199     }
200 };
201 
202 
203 #ifdef OPENCV_WITH_ITT
204 static __itt_domain* domain = NULL;
205 
isITTEnabled()206 static bool isITTEnabled()
207 {
208     static volatile bool isInitialized = false;
209     static bool isEnabled = false;
210     if (!isInitialized)
211     {
212         cv::AutoLock lock(cv::getInitializationMutex());
213         if (!isInitialized)
214         {
215             bool param_traceITTEnable = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_ENABLE", true);
216             if (param_traceITTEnable)
217             {
218                 isEnabled = !!(__itt_api_version());
219                 CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
220                 domain = __itt_domain_create("OpenCVTrace");
221             }
222             else
223             {
224                 CV_LOG_ITT("ITT is disabled through OpenCV parameter");
225                 isEnabled = false;
226             }
227             isInitialized = true;
228         }
229     }
230     return isEnabled;
231 }
232 #endif
233 
234 
LocationExtraData(const LocationStaticStorage & location)235 Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location)
236 {
237     CV_UNUSED(location);
238     static int g_location_id_counter = 0;
239     global_location_id = CV_XADD(&g_location_id_counter, 1) + 1;
240     CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")"
241             << std::endl << "    file: " << location.filename
242             << std::endl << "    line: " << location.line
243             << std::endl << "    name: " << location.name);
244 #ifdef OPENCV_WITH_ITT
245     if (isITTEnabled())
246     {
247         // Caching is not required here, because there is builtin cache.
248         // https://software.intel.com/en-us/node/544203:
249         //     Consecutive calls to __itt_string_handle_create with the same name return the same value.
250         ittHandle_name = __itt_string_handle_create(location.name);
251         ittHandle_filename = __itt_string_handle_create(location.filename);
252     }
253     else
254     {
255         ittHandle_name = 0;
256         ittHandle_filename = 0;
257     }
258 #endif
259 }
260 
init(const Region::LocationStaticStorage & location)261 /*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location)
262 {
263     LocationExtraData** pLocationExtra = location.ppExtra;
264     CV_DbgAssert(pLocationExtra);
265     if (*pLocationExtra == NULL)
266     {
267         cv::AutoLock lock(cv::getInitializationMutex());
268         if (*pLocationExtra == NULL)
269         {
270             *pLocationExtra = new Region::LocationExtraData(location);
271             TraceStorage* s = getTraceManager().trace_storage.get();
272             if (s)
273             {
274                 TraceMessage msg;
275                 msg.formatlocation(location);
276                 s->put(msg);
277             }
278         }
279     }
280     return *pLocationExtra;
281 }
282 
283 
Impl(TraceManagerThreadLocal & ctx,Region * parentRegion_,Region & region_,const LocationStaticStorage & location_,int64 beginTimestamp_)284 Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) :
285     location(location_),
286     region(region_),
287     parentRegion(parentRegion_),
288     threadID(ctx.threadID),
289     global_region_id(++ctx.region_counter),
290     beginTimestamp(beginTimestamp_),
291     endTimestamp(0),
292     directChildrenCount(0)
293 #ifdef OPENCV_WITH_ITT
294     ,itt_id_registered(false)
295     ,itt_id(__itt_null)
296 #endif
297 {
298     CV_DbgAssert(ctx.currentActiveRegion == parentRegion);
299     region.pImpl = this;
300 
301     registerRegion(ctx);
302 
303     enterRegion(ctx);
304 }
305 
~Impl()306 Region::Impl::~Impl()
307 {
308 #ifdef OPENCV_WITH_ITT
309     if (itt_id_registered)
310     {
311         CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this);
312         __itt_id_destroy(domain, itt_id);
313         itt_id_registered = false;
314     }
315 #endif
316     region.pImpl = NULL;
317 }
318 
enterRegion(TraceManagerThreadLocal & ctx)319 void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx)
320 {
321     ctx.currentActiveRegion = &region;
322 
323     if (location.flags & REGION_FLAG_FUNCTION)
324     {
325         if ((location.flags & REGION_FLAG_APP_CODE) == 0)
326         {
327             ctx.regionDepthOpenCV++;
328         }
329         ctx.regionDepth++;
330     }
331 
332     TraceStorage* s = ctx.getStorage();
333     if (s)
334     {
335         TraceMessage msg;
336         msg.formatRegionEnter(region);
337         s->put(msg);
338     }
339 #ifdef OPENCV_WITH_ITT
340     if (isITTEnabled())
341     {
342         __itt_id parentID = __itt_null;
343         if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0)
344             parentID = parentRegion->pImpl->itt_id;
345         __itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name);
346     }
347 #endif
348 }
349 
leaveRegion(TraceManagerThreadLocal & ctx)350 void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx)
351 {
352     int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration);
353     RegionStatistics result;
354     ctx.stat.grab(result);
355     ctx.totalSkippedEvents += result.currentSkippedRegions;
356     CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result);
357 #ifdef OPENCV_WITH_ITT
358     if (isITTEnabled())
359     {
360         if (result.currentSkippedRegions)
361         {
362             __itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions);
363         }
364 #ifdef HAVE_IPP
365         if (result.durationImplIPP)
366             __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP);
367 #endif
368 #ifdef HAVE_OPENCL
369         if (result.durationImplOpenCL)
370             __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL);
371 #endif
372 #ifdef HAVE_OPENVX
373         if (result.durationImplOpenVX)
374             __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX);
375 #endif
376         __itt_task_end(domain);
377     }
378 #endif
379     TraceStorage* s = ctx.getStorage();
380     if (s)
381     {
382         TraceMessage msg;
383         msg.formatRegionLeave(region, result);
384         s->put(msg);
385     }
386 
387     if (location.flags & REGION_FLAG_FUNCTION)
388     {
389         if ((location.flags & REGION_FLAG_APP_CODE) == 0)
390         {
391             ctx.regionDepthOpenCV--;
392         }
393         ctx.regionDepth--;
394     }
395 
396     ctx.currentActiveRegion = parentRegion;
397 }
398 
release()399 void Region::Impl::release()
400 {
401     delete this;
402 }
403 
registerRegion(TraceManagerThreadLocal & ctx)404 void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx)
405 {
406 #ifdef OPENCV_WITH_ITT
407     if (isITTEnabled())
408     {
409         if (!itt_id_registered)
410         {
411             CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id);
412 #if 1 // workaround for some ITT backends
413             itt_id = __itt_id_make((void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), global_region_id);
414 #else
415             itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id);
416 #endif
417             __itt_id_create(domain, itt_id);
418             itt_id_registered = true;
419         }
420     }
421 #else
422     CV_UNUSED(ctx);
423 #endif
424 }
425 
enableSkipMode(int depth)426 void RegionStatisticsStatus::enableSkipMode(int depth)
427 {
428     CV_DbgAssert(_skipDepth < 0);
429     CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth);
430     _skipDepth = depth;
431 }
checkResetSkipMode(int leaveDepth)432 void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth)
433 {
434     if (leaveDepth <= _skipDepth)
435     {
436         CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth);
437         _skipDepth = -1;
438     }
439 }
440 
Region(const LocationStaticStorage & location)441 Region::Region(const LocationStaticStorage& location) :
442     pImpl(NULL),
443     implFlags(0)
444 {
445     // Checks:
446     // - global enable flag
447     // - parent region is disabled
448     // - children count threshold
449     // - region location
450     // - depth (opencv nested calls)
451     if (!TraceManager::isActivated())
452     {
453         CV_LOG("Trace is disabled. Bailout");
454         return;
455     }
456 
457     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
458     CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name);
459 
460     Region* parentRegion = ctx.stackTopRegion();
461     const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation();
462 
463     if (location.flags & REGION_FLAG_REGION_NEXT)
464     {
465         if (parentRegion && parentRegion->pImpl)
466         {
467             CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0);
468             parentRegion->destroy(); parentRegion->implFlags = 0;
469             parentRegion = ctx.stackTopRegion();
470             parentLocation = ctx.stackTopLocation();
471         }
472     }
473 
474     int parentChildren = 0;
475     if (parentRegion && parentRegion->pImpl)
476     {
477         if (parentLocation == NULL)
478         {
479             // parallel_for_body code path
480             parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1;
481         }
482         else
483         {
484             parentChildren = ++parentRegion->pImpl->directChildrenCount;
485         }
486     }
487 
488     int64 beginTimestamp = getTimestamp();
489 
490     int currentDepth = ctx.getCurrentDepth() + 1;
491     switch (location.flags & REGION_FLAG_IMPL_MASK)
492     {
493 #ifdef HAVE_IPP
494     case REGION_FLAG_IMPL_IPP:
495         if (!ctx.stat_status.ignoreDepthImplIPP)
496             ctx.stat_status.ignoreDepthImplIPP = currentDepth;
497         break;
498 #endif
499 #ifdef HAVE_OPENCL
500     case REGION_FLAG_IMPL_OPENCL:
501         if (!ctx.stat_status.ignoreDepthImplOpenCL)
502             ctx.stat_status.ignoreDepthImplOpenCL = currentDepth;
503         break;
504 #endif
505 #ifdef HAVE_OPENVX
506     case REGION_FLAG_IMPL_OPENVX:
507         if (!ctx.stat_status.ignoreDepthImplOpenVX)
508             ctx.stat_status.ignoreDepthImplOpenVX = currentDepth;
509         break;
510 #endif
511     default:
512         break;
513     }
514 
515     ctx.stackPush(this, &location, beginTimestamp);
516     implFlags |= REGION_FLAG__NEED_STACK_POP;
517 
518     if ((location.flags & REGION_FLAG_REGION_FORCE) == 0)
519     {
520         if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth)
521         {
522             CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout");
523             ctx.stat.currentSkippedRegions++;
524             return;
525         }
526 
527         if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0)
528         {
529             if (parentChildren >= param_maxRegionChildrenOpenCV)
530             {
531                 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout");
532                 ctx.stat_status.enableSkipMode(currentDepth - 1);
533                 ctx.stat.currentSkippedRegions++;
534                 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
535                 return;
536             }
537         }
538         if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren)
539         {
540             CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout");
541             ctx.stat_status.enableSkipMode(currentDepth - 1);
542             ctx.stat.currentSkippedRegions++;
543             DEBUG_ONLY(ctx.dumpStack(std::cout, false));
544             return;
545         }
546     }
547 
548     LocationExtraData::init(location);
549 
550     if ((*location.ppExtra)->global_location_id == 0)
551     {
552         CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout");
553         ctx.stat_status.enableSkipMode(currentDepth);
554         ctx.stat.currentSkippedRegions++;
555         return;
556     }
557 
558     if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED))
559     {
560         CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout");
561         ctx.stat_status.enableSkipMode(currentDepth);
562         ctx.stat.currentSkippedRegions++;
563         return;
564     }
565 
566     if (param_maxRegionDepthOpenCV)
567     {
568         if ((location.flags & REGION_FLAG_APP_CODE) == 0)
569         {
570             if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV)
571             {
572                 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout");
573                 if (ctx.stat.currentSkippedRegions == 0)
574                 {
575                     DEBUG_ONLY(ctx.dumpStack(std::cout, false));
576                 }
577                 ctx.stat_status.enableSkipMode(currentDepth);
578                 ctx.stat.currentSkippedRegions++;
579                 return;
580             }
581         }
582     }
583 
584     new Impl(ctx, parentRegion, *this, location, beginTimestamp);
585     CV_DbgAssert(pImpl != NULL);
586     implFlags |= REGION_FLAG__ACTIVE;
587 
588     // parallel_for path
589     if (parentRegion && parentRegion->pImpl)
590     {
591         if (parentLocation == NULL)
592         {
593             pImpl->directChildrenCount = parentChildren;
594         }
595     }
596 }
597 
destroy()598 void Region::destroy()
599 {
600     CV_DbgAssert(implFlags != 0);
601 
602     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
603     CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "<unknown>"));
604 
605     CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP);
606     const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth);
607 
608     CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status);
609 
610     const Region::LocationStaticStorage* location = ctx.stackTopLocation();
611     Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN;
612     if (location)
613     {
614         switch (location->flags & REGION_FLAG_IMPL_MASK)
615         {
616 #ifdef HAVE_IPP
617         case REGION_FLAG_IMPL_IPP:
618             myCodePath = Impl::CODE_PATH_IPP;
619             break;
620 #endif
621 #ifdef HAVE_OPENCL
622         case REGION_FLAG_IMPL_OPENCL:
623             if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated())
624                 cv::ocl::finish();
625             myCodePath = Impl::CODE_PATH_OPENCL;
626             break;
627 #endif
628 #ifdef HAVE_OPENVX
629         case REGION_FLAG_IMPL_OPENVX:
630             myCodePath = Impl::CODE_PATH_OPENVX;
631             break;
632 #endif
633         default:
634             break;
635         }
636     }
637 
638     int64 endTimestamp = getTimestamp();
639     int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
640 
641     bool active = isActive();
642 
643     if (active)
644         ctx.stat.duration = duration;
645     else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1)
646         ctx.stat.duration += duration;
647 
648     switch (myCodePath) {
649         case Impl::CODE_PATH_PLAIN:
650             // nothing
651             break;
652 #ifdef HAVE_IPP
653         case Impl::CODE_PATH_IPP:
654             if (ctx.stat_status.ignoreDepthImplIPP == currentDepth)
655             {
656                 ctx.stat.durationImplIPP += duration;
657                 ctx.stat_status.ignoreDepthImplIPP = 0;
658             }
659             else if (active)
660             {
661                 ctx.stat.durationImplIPP = duration;
662             }
663             break;
664 #endif
665 #ifdef HAVE_OPENCL
666         case Impl::CODE_PATH_OPENCL:
667             if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth)
668             {
669                 ctx.stat.durationImplOpenCL += duration;
670                 ctx.stat_status.ignoreDepthImplOpenCL = 0;
671             }
672             else if (active)
673             {
674                 ctx.stat.durationImplOpenCL = duration;
675             }
676             break;
677 #endif
678 #ifdef HAVE_OPENVX
679         case Impl::CODE_PATH_OPENVX:
680             if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth)
681             {
682                 ctx.stat.durationImplOpenVX += duration;
683                 ctx.stat_status.ignoreDepthImplOpenVX = 0;
684             }
685             else if (active)
686             {
687                 ctx.stat.durationImplOpenVX = duration;
688             }
689             break;
690 #endif
691         default:
692             break;
693     }
694 
695     if (pImpl)
696     {
697         CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP));
698         CV_DbgAssert(ctx.stackTopRegion() == this);
699         pImpl->endTimestamp = endTimestamp;
700         pImpl->leaveRegion(ctx);
701         pImpl->release();
702         pImpl = NULL;
703         DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE);
704     }
705     else
706     {
707         CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth);
708     }
709 
710     if (implFlags & REGION_FLAG__NEED_STACK_POP)
711     {
712         CV_DbgAssert(ctx.stackTopRegion() == this);
713         ctx.stackPop();
714         ctx.stat_status.checkResetSkipMode(currentDepth);
715         DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP);
716     }
717     CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status);
718 }
719 
720 
~TraceManagerThreadLocal()721 TraceManagerThreadLocal::~TraceManagerThreadLocal()
722 {
723 }
724 
dumpStack(std::ostream & out,bool onlyFunctions) const725 void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const
726 {
727     std::stringstream ss;
728     std::deque<StackEntry>::const_iterator it = stack.begin();
729     std::deque<StackEntry>::const_iterator end = stack.end();
730     int depth = 0;
731     for (; it != end; ++it)
732     {
733         const Region::LocationStaticStorage* location = it->location;
734         if (location)
735         {
736             if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION))
737             {
738                 ss << _spaces(4*depth) << location->name << std::endl;
739                 depth++;
740             }
741         }
742         else
743         {
744             ss << _spaces(4*depth) << "<unknown>" << std::endl;
745             depth++;
746         }
747     }
748     out << ss.str();
749 }
750 
751 class AsyncTraceStorage CV_FINAL : public TraceStorage
752 {
753     mutable std::ofstream out;
754 public:
755     const std::string name;
756 
AsyncTraceStorage(const std::string & filename)757     AsyncTraceStorage(const std::string& filename) :
758         out(filename.c_str(), std::ios::trunc),
759         name(filename)
760     {
761         out << "#description: OpenCV trace file" << std::endl;
762         out << "#version: 1.0" << std::endl;
763     }
~AsyncTraceStorage()764     ~AsyncTraceStorage()
765     {
766         out.close();
767     }
768 
put(const TraceMessage & msg) const769     bool put(const TraceMessage& msg) const CV_OVERRIDE
770     {
771         if (msg.hasError)
772             return false;
773         out << msg.buffer;
774         //DEBUG_ONLY(std::flush(out)); // TODO configure flag
775         return true;
776     }
777 };
778 
779 class SyncTraceStorage CV_FINAL : public TraceStorage
780 {
781     mutable std::ofstream out;
782     mutable cv::Mutex mutex;
783 public:
784     const std::string name;
785 
SyncTraceStorage(const std::string & filename)786     SyncTraceStorage(const std::string& filename) :
787         out(filename.c_str(), std::ios::trunc),
788         name(filename)
789     {
790         out << "#description: OpenCV trace file" << std::endl;
791         out << "#version: 1.0" << std::endl;
792     }
~SyncTraceStorage()793     ~SyncTraceStorage()
794     {
795         cv::AutoLock l(mutex);
796         out.close();
797     }
798 
put(const TraceMessage & msg) const799     bool put(const TraceMessage& msg) const CV_OVERRIDE
800     {
801         if (msg.hasError)
802             return false;
803         {
804             cv::AutoLock l(mutex);
805             out << msg.buffer;
806             std::flush(out); // TODO configure flag
807         }
808         return true;
809     }
810 };
811 
812 
getStorage() const813 TraceStorage* TraceManagerThreadLocal::getStorage() const
814 {
815     // TODO configuration option for stdout/single trace file
816     if (storage.empty())
817     {
818         TraceStorage* global = getTraceManager().trace_storage.get();
819         if (global)
820         {
821             const std::string filepath = cv::format("%s-%03d.txt", getParameterTraceLocation().c_str(), threadID).c_str();
822             TraceMessage msg;
823             const char* pos = strrchr(filepath.c_str(), '/'); // extract filename
824 #ifdef _WIN32
825             if (!pos)
826                 pos = strrchr(filepath.c_str(), '\\');
827 #endif
828             if (!pos)
829                 pos = filepath.c_str();
830             else
831                 pos += 1; // fix to skip extra slash in filename beginning
832             msg.printf("#thread file: %s\n", pos);
833             global->put(msg);
834             storage.reset(new AsyncTraceStorage(filepath));
835         }
836     }
837     return storage.get();
838 }
839 
840 
841 
842 static bool activated = false;
843 static bool isInitialized = false;
844 
TraceManager()845 TraceManager::TraceManager()
846 {
847     g_zero_timestamp = cv::getTickCount();
848 
849     isInitialized = true;
850     CV_LOG("TraceManager ctor: " << (void*)this);
851 
852     CV_LOG("TraceManager configure()");
853     activated = getParameterTraceEnable();
854 
855     if (activated)
856         trace_storage.reset(new SyncTraceStorage(std::string(getParameterTraceLocation()) + ".txt"));
857 
858 #ifdef OPENCV_WITH_ITT
859     if (isITTEnabled())
860     {
861         activated = true; // force trace pipeline activation (without OpenCV storage)
862         __itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace"));
863     }
864 #endif
865 }
~TraceManager()866 TraceManager::~TraceManager()
867 {
868     CV_LOG("TraceManager dtor: " << (void*)this);
869 
870 #ifdef OPENCV_WITH_ITT
871     if (isITTEnabled())
872     {
873         __itt_region_end(domain, __itt_null);
874     }
875 #endif
876 
877     std::vector<TraceManagerThreadLocal*> threads_ctx;
878     tls.gather(threads_ctx);
879     size_t totalEvents = 0, totalSkippedEvents = 0;
880     for (size_t i = 0; i < threads_ctx.size(); i++)
881     {
882         TraceManagerThreadLocal* ctx = threads_ctx[i];
883         if (ctx)
884         {
885             totalEvents += ctx->region_counter;
886             totalSkippedEvents += ctx->totalSkippedEvents;
887         }
888     }
889     if (totalEvents || activated)
890     {
891         CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents);
892     }
893     if (totalSkippedEvents)
894     {
895         CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents);
896     }
897 
898     // This is a global static object, so process starts shutdown here
899     // Turn off trace
900     cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH
901     activated = false;
902 }
903 
isActivated()904 bool TraceManager::isActivated()
905 {
906     // Check if process starts shutdown, and set earlyExit to true
907     // to prevent further instrumentation processing earlier.
908     if (cv::__termination)
909     {
910         activated = false;
911         return false;
912     }
913 
914     if (!isInitialized)
915     {
916         TraceManager& m = getTraceManager();
917         CV_UNUSED(m); // TODO
918     }
919 
920     return activated;
921 }
922 
923 
getTraceManagerCallOnce()924 static TraceManager* getTraceManagerCallOnce()
925 {
926     static TraceManager globalInstance;
927     return &globalInstance;
928 }
getTraceManager()929 TraceManager& getTraceManager()
930 {
931     CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce())
932 }
933 
parallelForSetRootRegion(const Region & rootRegion,const TraceManagerThreadLocal & root_ctx)934 void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx)
935 {
936     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
937 
938     if (ctx.dummy_stack_top.region == &rootRegion) // already attached
939         return;
940 
941     CV_Assert(ctx.dummy_stack_top.region == NULL);
942     ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast<Region*>(&rootRegion), NULL, -1);
943 
944     if (&ctx == &root_ctx)
945     {
946         ctx.stat.grab(ctx.parallel_for_stat);
947         ctx.parallel_for_stat_status = ctx.stat_status;
948         ctx.parallel_for_stack_size = ctx.stack.size();
949         return;
950     }
951 
952     CV_Assert(ctx.stack.empty());
953 
954     ctx.currentActiveRegion = const_cast<Region*>(&rootRegion);
955 
956     ctx.regionDepth = root_ctx.regionDepth;
957     ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV;
958 
959     ctx.parallel_for_stack_size = 0;
960 
961     ctx.stat_status.propagateFrom(root_ctx.stat_status);
962 }
963 
parallelForAttachNestedRegion(const Region & rootRegion)964 void parallelForAttachNestedRegion(const Region& rootRegion)
965 {
966     CV_UNUSED(rootRegion);
967     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
968 
969     CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion);
970 
971     Region* region = ctx.getCurrentActiveRegion();
972     CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion);
973     if (!region)
974         return;
975 
976 #ifdef OPENCV_WITH_ITT
977     if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered)
978         return;
979 
980     if (!region->pImpl)
981         return;
982 
983     CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => "
984                                  << (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3);
985     __itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id);
986 #endif
987 }
988 
parallelForFinalize(const Region & rootRegion)989 void parallelForFinalize(const Region& rootRegion)
990 {
991     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
992 
993     int64 endTimestamp = getTimestamp();
994     int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
995     CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion);
996 
997     std::vector<TraceManagerThreadLocal*> threads_ctx;
998     getTraceManager().tls.gather(threads_ctx);
999     RegionStatistics parallel_for_stat;
1000     int threads = 0;
1001     for (size_t i = 0; i < threads_ctx.size(); i++)
1002     {
1003         TraceManagerThreadLocal* child_ctx = threads_ctx[i];
1004 
1005         if (child_ctx && child_ctx->stackTopRegion() == &rootRegion)
1006         {
1007             CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat);
1008             threads++;
1009             RegionStatistics child_stat;
1010             child_ctx->stat.grab(child_stat);
1011             parallel_for_stat.append(child_stat);
1012             if (child_ctx != &ctx)
1013             {
1014                 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1015             }
1016             else
1017             {
1018                 ctx.parallel_for_stat.grab(ctx.stat);
1019                 ctx.stat_status = ctx.parallel_for_stat_status;
1020                 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1021             }
1022         }
1023     }
1024     float parallel_coeff = std::min(1.0f, duration / (float)(parallel_for_stat.duration));
1025     CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff);
1026     CV_LOG_PARALLEL(NULL, parallel_for_stat);
1027     if (parallel_coeff != 1.0f)
1028     {
1029         parallel_for_stat.multiply(parallel_coeff);
1030         CV_LOG_PARALLEL(NULL, parallel_for_stat);
1031     }
1032     parallel_for_stat.duration = 0;
1033     ctx.stat.append(parallel_for_stat);
1034     CV_LOG_PARALLEL(NULL, ctx.stat);
1035 }
1036 
1037 struct TraceArg::ExtraData
1038 {
1039 #ifdef OPENCV_WITH_ITT
1040     // Special fields for ITT
1041     __itt_string_handle* volatile ittHandle_name;
1042 #endif
ExtraDatacv::utils::trace::details::TraceArg::ExtraData1043     ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1044     {
1045         CV_UNUSED(ctx); CV_UNUSED(arg);
1046 #ifdef OPENCV_WITH_ITT
1047         if (isITTEnabled())
1048         {
1049             // Caching is not required here, because there is builtin cache.
1050             // https://software.intel.com/en-us/node/544203:
1051             //     Consecutive calls to __itt_string_handle_create with the same name return the same value.
1052             ittHandle_name = __itt_string_handle_create(arg.name);
1053         }
1054         else
1055         {
1056             ittHandle_name = 0;
1057         }
1058 #endif
1059     }
1060 };
1061 
initTraceArg(TraceManagerThreadLocal & ctx,const TraceArg & arg)1062 static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1063 {
1064     TraceArg::ExtraData** pExtra = arg.ppExtra;
1065     if (*pExtra == NULL)
1066     {
1067         cv::AutoLock lock(cv::getInitializationMutex());
1068         if (*pExtra == NULL)
1069         {
1070             *pExtra = new TraceArg::ExtraData(ctx, arg);
1071         }
1072     }
1073 }
traceArg(const TraceArg & arg,const char * value)1074 void traceArg(const TraceArg& arg, const char* value)
1075 {
1076     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1077     Region* region = ctx.getCurrentActiveRegion();
1078     if (!region)
1079         return;
1080     CV_Assert(region->pImpl);
1081     initTraceArg(ctx, arg);
1082     if (!value)
1083         value = "<null>";
1084 #ifdef OPENCV_WITH_ITT
1085     if (isITTEnabled())
1086     {
1087         __itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value));
1088     }
1089 #endif
1090 }
traceArg(const TraceArg & arg,int value)1091 void traceArg(const TraceArg& arg, int value)
1092 {
1093     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1094     Region* region = ctx.getCurrentActiveRegion();
1095     if (!region)
1096         return;
1097     CV_Assert(region->pImpl);
1098     initTraceArg(ctx, arg);
1099 #ifdef OPENCV_WITH_ITT
1100     if (isITTEnabled())
1101     {
1102         __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value);
1103     }
1104 #else
1105     CV_UNUSED(value);
1106 #endif
1107 }
traceArg(const TraceArg & arg,int64 value)1108 void traceArg(const TraceArg& arg, int64 value)
1109 {
1110     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1111     Region* region = ctx.getCurrentActiveRegion();
1112     if (!region)
1113         return;
1114     CV_Assert(region->pImpl);
1115     initTraceArg(ctx, arg);
1116 #ifdef OPENCV_WITH_ITT
1117     if (isITTEnabled())
1118     {
1119         __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value);
1120     }
1121 #else
1122     CV_UNUSED(value);
1123 #endif
1124 }
traceArg(const TraceArg & arg,double value)1125 void traceArg(const TraceArg& arg, double value)
1126 {
1127     TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1128     Region* region = ctx.getCurrentActiveRegion();
1129     if (!region)
1130         return;
1131     CV_Assert(region->pImpl);
1132     initTraceArg(ctx, arg);
1133 #ifdef OPENCV_WITH_ITT
1134     if (isITTEnabled())
1135     {
1136         __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value);
1137     }
1138 #else
1139     CV_UNUSED(value);
1140 #endif
1141 }
1142 
1143 #else
1144 
1145 Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {}
1146 void Region::destroy() {}
1147 
1148 void traceArg(const TraceArg&, const char*) {}
1149 void traceArg(const TraceArg&, int) {};
1150 void traceArg(const TraceArg&, int64) {};
1151 void traceArg(const TraceArg&, double) {};
1152 
1153 #endif
1154 
1155 }}}} // namespace
1156