1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #include "precomp.hpp"
6
7 #include <opencv2/core/utils/trace.hpp>
8 #include <opencv2/core/utils/trace.private.hpp>
9 #include <opencv2/core/utils/configuration.private.hpp>
10
11 #include <opencv2/core/opencl/ocl_defs.hpp>
12
13 #include <cstdarg> // va_start
14
15 #include <sstream>
16 #include <ostream>
17 #include <fstream>
18
19 #if 0
20 #define CV_LOG(...) CV_LOG_INFO(NULL, __VA_ARGS__)
21 #else
22 #define CV_LOG(...) {}
23 #endif
24
25 #if 0
26 #define CV_LOG_ITT(...) CV_LOG_INFO(NULL, __VA_ARGS__)
27 #else
28 #define CV_LOG_ITT(...) {}
29 #endif
30
31 #if 1
32 #define CV_LOG_TRACE_BAILOUT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
33 #else
34 #define CV_LOG_TRACE_BAILOUT(...) {}
35 #endif
36
37 #if 0
38 #define CV_LOG_PARALLEL(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
39 #else
40 #define CV_LOG_PARALLEL(...) {}
41 #endif
42
43 #if 0
44 #define CV_LOG_CTX_STAT(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
45 #else
46 #define CV_LOG_CTX_STAT(...) {}
47 #endif
48
49 #if 0
50 #define CV_LOG_SKIP(tag, ...) CV_LOG_INFO(tag, __VA_ARGS__)
51 #else
52 #define CV_LOG_SKIP(...) {}
53 #endif
54
55 namespace cv {
56 namespace utils {
57 namespace trace {
58 namespace details {
59
60 #ifdef OPENCV_TRACE
61
62 #ifdef _MSC_VER
63 #pragma warning(disable:4065) // switch statement contains 'default' but no 'case' labels
64 #endif
65
66 static int64 g_zero_timestamp = 0;
67
getTimestamp()68 static int64 getTimestamp()
69 {
70 int64 t = getTickCount();
71 static double tick_to_ns = 1e9 / getTickFrequency();
72 return (int64)((t - g_zero_timestamp) * tick_to_ns);
73 }
74
getParameterTraceEnable()75 static bool getParameterTraceEnable()
76 {
77 static bool param_traceEnable = utils::getConfigurationParameterBool("OPENCV_TRACE", false);
78 return param_traceEnable;
79 }
80
81 // TODO lazy configuration flags
82 static int param_maxRegionDepthOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_DEPTH_OPENCV", 1);
83 static int param_maxRegionChildrenOpenCV = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN_OPENCV", 1000);
84 static int param_maxRegionChildren = (int)utils::getConfigurationParameterSizeT("OPENCV_TRACE_MAX_CHILDREN", 10000);
85
getParameterTraceLocation()86 static const cv::String& getParameterTraceLocation()
87 {
88 static cv::String param_traceLocation = utils::getConfigurationParameterString("OPENCV_TRACE_LOCATION", "OpenCVTrace");
89 return param_traceLocation;
90 }
91
92 #ifdef HAVE_OPENCL
93 static bool param_synchronizeOpenCL = utils::getConfigurationParameterBool("OPENCV_TRACE_SYNC_OPENCL", false);
94 #endif
95
96 #ifdef OPENCV_WITH_ITT
97 static bool param_ITT_registerParentScope = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_PARENT", false);
98 #endif
99
_spaces(int count)100 static const char* _spaces(int count)
101 {
102 static const char buf[64] =
103 " ";
104 return &buf[63 - (count & 63)];
105 }
106
107 /**
108 * Text-based trace messages
109 */
110 class TraceMessage
111 {
112 public:
113 char buffer[1024];
114 size_t len;
115 bool hasError;
116
TraceMessage()117 TraceMessage() :
118 len(0),
119 hasError(false)
120 {}
121
printf(const char * format,...)122 bool printf(const char* format, ...)
123 {
124 char* buf = &buffer[len];
125 size_t sz = sizeof(buffer) - len;
126 va_list ap;
127 va_start(ap, format);
128 int n = cv_vsnprintf(buf, (int)sz, format, ap);
129 va_end(ap);
130 if (n < 0 || (size_t)n > sz)
131 {
132 hasError = true;
133 return false;
134 }
135 len += n;
136 return true;
137 }
138
formatlocation(const Region::LocationStaticStorage & location)139 bool formatlocation(const Region::LocationStaticStorage& location)
140 {
141 return this->printf("l,%lld,\"%s\",%d,\"%s\",0x%llX\n",
142 (long long int)(*location.ppExtra)->global_location_id,
143 location.filename,
144 location.line,
145 location.name,
146 (long long int)(location.flags & ~0xF0000000));
147 }
formatRegionEnter(const Region & region)148 bool formatRegionEnter(const Region& region)
149 {
150 bool ok = this->printf("b,%d,%lld,%lld,%lld",
151 (int)region.pImpl->threadID,
152 (long long int)region.pImpl->beginTimestamp,
153 (long long int)((*region.pImpl->location.ppExtra)->global_location_id),
154 (long long int)region.pImpl->global_region_id);
155 if (region.pImpl->parentRegion && region.pImpl->parentRegion->pImpl)
156 {
157 if (region.pImpl->parentRegion->pImpl->threadID != region.pImpl->threadID)
158 ok &= this->printf(",parentThread=%d,parent=%lld",
159 (int)region.pImpl->parentRegion->pImpl->threadID,
160 (long long int)region.pImpl->parentRegion->pImpl->global_region_id);
161 }
162 ok &= this->printf("\n");
163 return ok;
164 }
formatRegionLeave(const Region & region,const RegionStatistics & result)165 bool formatRegionLeave(const Region& region, const RegionStatistics& result)
166 {
167 CV_DbgAssert(region.pImpl->endTimestamp - region.pImpl->beginTimestamp == result.duration);
168 bool ok = this->printf("e,%d,%lld,%lld,%lld,%lld",
169 (int)region.pImpl->threadID,
170 (long long int)region.pImpl->endTimestamp,
171 (long long int)(*region.pImpl->location.ppExtra)->global_location_id,
172 (long long int)region.pImpl->global_region_id,
173 (long long int)result.duration);
174 if (result.currentSkippedRegions)
175 ok &= this->printf(",skip=%d", (int)result.currentSkippedRegions);
176 #ifdef HAVE_IPP
177 if (result.durationImplIPP)
178 ok &= this->printf(",tIPP=%lld", (long long int)result.durationImplIPP);
179 #endif
180 #ifdef HAVE_OPENCL
181 if (result.durationImplOpenCL)
182 ok &= this->printf(",tOCL=%lld", (long long int)result.durationImplOpenCL);
183 #endif
184 #ifdef HAVE_OPENVX
185 if (result.durationImplOpenVX)
186 ok &= this->printf(",tOVX=%lld", (long long int)result.durationImplOpenVX);
187 #endif
188 ok &= this->printf("\n");
189 return ok;
190 }
recordRegionArg(const Region & region,const TraceArg & arg,const char * value)191 bool recordRegionArg(const Region& region, const TraceArg& arg, const char* value)
192 {
193 return this->printf("a,%d,%lld,%lld,\"%s\",\"%s\"\n",
194 region.pImpl->threadID,
195 (long long int)region.pImpl->beginTimestamp,
196 (long long int)region.pImpl->global_region_id,
197 arg.name,
198 value);
199 }
200 };
201
202
203 #ifdef OPENCV_WITH_ITT
204 static __itt_domain* domain = NULL;
205
isITTEnabled()206 static bool isITTEnabled()
207 {
208 static volatile bool isInitialized = false;
209 static bool isEnabled = false;
210 if (!isInitialized)
211 {
212 cv::AutoLock lock(cv::getInitializationMutex());
213 if (!isInitialized)
214 {
215 bool param_traceITTEnable = utils::getConfigurationParameterBool("OPENCV_TRACE_ITT_ENABLE", true);
216 if (param_traceITTEnable)
217 {
218 isEnabled = !!(__itt_api_version());
219 CV_LOG_ITT("ITT is " << (isEnabled ? "enabled" : "disabled"));
220 domain = __itt_domain_create("OpenCVTrace");
221 }
222 else
223 {
224 CV_LOG_ITT("ITT is disabled through OpenCV parameter");
225 isEnabled = false;
226 }
227 isInitialized = true;
228 }
229 }
230 return isEnabled;
231 }
232 #endif
233
234
LocationExtraData(const LocationStaticStorage & location)235 Region::LocationExtraData::LocationExtraData(const LocationStaticStorage& location)
236 {
237 CV_UNUSED(location);
238 static int g_location_id_counter = 0;
239 global_location_id = CV_XADD(&g_location_id_counter, 1) + 1;
240 CV_LOG("Register location: " << global_location_id << " (" << (void*)&location << ")"
241 << std::endl << " file: " << location.filename
242 << std::endl << " line: " << location.line
243 << std::endl << " name: " << location.name);
244 #ifdef OPENCV_WITH_ITT
245 if (isITTEnabled())
246 {
247 // Caching is not required here, because there is builtin cache.
248 // https://software.intel.com/en-us/node/544203:
249 // Consecutive calls to __itt_string_handle_create with the same name return the same value.
250 ittHandle_name = __itt_string_handle_create(location.name);
251 ittHandle_filename = __itt_string_handle_create(location.filename);
252 }
253 else
254 {
255 ittHandle_name = 0;
256 ittHandle_filename = 0;
257 }
258 #endif
259 }
260
init(const Region::LocationStaticStorage & location)261 /*static*/ Region::LocationExtraData* Region::LocationExtraData::init(const Region::LocationStaticStorage& location)
262 {
263 LocationExtraData** pLocationExtra = location.ppExtra;
264 CV_DbgAssert(pLocationExtra);
265 if (*pLocationExtra == NULL)
266 {
267 cv::AutoLock lock(cv::getInitializationMutex());
268 if (*pLocationExtra == NULL)
269 {
270 *pLocationExtra = new Region::LocationExtraData(location);
271 TraceStorage* s = getTraceManager().trace_storage.get();
272 if (s)
273 {
274 TraceMessage msg;
275 msg.formatlocation(location);
276 s->put(msg);
277 }
278 }
279 }
280 return *pLocationExtra;
281 }
282
283
Impl(TraceManagerThreadLocal & ctx,Region * parentRegion_,Region & region_,const LocationStaticStorage & location_,int64 beginTimestamp_)284 Region::Impl::Impl(TraceManagerThreadLocal& ctx, Region* parentRegion_, Region& region_, const LocationStaticStorage& location_, int64 beginTimestamp_) :
285 location(location_),
286 region(region_),
287 parentRegion(parentRegion_),
288 threadID(ctx.threadID),
289 global_region_id(++ctx.region_counter),
290 beginTimestamp(beginTimestamp_),
291 endTimestamp(0),
292 directChildrenCount(0)
293 #ifdef OPENCV_WITH_ITT
294 ,itt_id_registered(false)
295 ,itt_id(__itt_null)
296 #endif
297 {
298 CV_DbgAssert(ctx.currentActiveRegion == parentRegion);
299 region.pImpl = this;
300
301 registerRegion(ctx);
302
303 enterRegion(ctx);
304 }
305
~Impl()306 Region::Impl::~Impl()
307 {
308 #ifdef OPENCV_WITH_ITT
309 if (itt_id_registered)
310 {
311 CV_LOG_ITT(" Destroy ITT region: I=" << (void*)this);
312 __itt_id_destroy(domain, itt_id);
313 itt_id_registered = false;
314 }
315 #endif
316 region.pImpl = NULL;
317 }
318
enterRegion(TraceManagerThreadLocal & ctx)319 void Region::Impl::enterRegion(TraceManagerThreadLocal& ctx)
320 {
321 ctx.currentActiveRegion = ®ion;
322
323 if (location.flags & REGION_FLAG_FUNCTION)
324 {
325 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
326 {
327 ctx.regionDepthOpenCV++;
328 }
329 ctx.regionDepth++;
330 }
331
332 TraceStorage* s = ctx.getStorage();
333 if (s)
334 {
335 TraceMessage msg;
336 msg.formatRegionEnter(region);
337 s->put(msg);
338 }
339 #ifdef OPENCV_WITH_ITT
340 if (isITTEnabled())
341 {
342 __itt_id parentID = __itt_null;
343 if (param_ITT_registerParentScope && parentRegion && parentRegion->pImpl && parentRegion->pImpl->itt_id_registered && (location.flags & REGION_FLAG_REGION_FORCE) == 0)
344 parentID = parentRegion->pImpl->itt_id;
345 __itt_task_begin(domain, itt_id, parentID, (*location.ppExtra)->ittHandle_name);
346 }
347 #endif
348 }
349
leaveRegion(TraceManagerThreadLocal & ctx)350 void Region::Impl::leaveRegion(TraceManagerThreadLocal& ctx)
351 {
352 int64 duration = endTimestamp - beginTimestamp; CV_UNUSED(duration);
353 RegionStatistics result;
354 ctx.stat.grab(result);
355 ctx.totalSkippedEvents += result.currentSkippedRegions;
356 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "leaveRegion(): " << (void*)this << " " << result);
357 #ifdef OPENCV_WITH_ITT
358 if (isITTEnabled())
359 {
360 if (result.currentSkippedRegions)
361 {
362 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("skipped trace entries"), __itt_metadata_u32, 1, &result.currentSkippedRegions);
363 }
364 #ifdef HAVE_IPP
365 if (result.durationImplIPP)
366 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tIPP"), __itt_metadata_u64, 1, &result.durationImplIPP);
367 #endif
368 #ifdef HAVE_OPENCL
369 if (result.durationImplOpenCL)
370 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenCL"), __itt_metadata_u64, 1, &result.durationImplOpenCL);
371 #endif
372 #ifdef HAVE_OPENVX
373 if (result.durationImplOpenVX)
374 __itt_metadata_add(domain, itt_id, __itt_string_handle_create("tOpenVX"), __itt_metadata_u64, 1, &result.durationImplOpenVX);
375 #endif
376 __itt_task_end(domain);
377 }
378 #endif
379 TraceStorage* s = ctx.getStorage();
380 if (s)
381 {
382 TraceMessage msg;
383 msg.formatRegionLeave(region, result);
384 s->put(msg);
385 }
386
387 if (location.flags & REGION_FLAG_FUNCTION)
388 {
389 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
390 {
391 ctx.regionDepthOpenCV--;
392 }
393 ctx.regionDepth--;
394 }
395
396 ctx.currentActiveRegion = parentRegion;
397 }
398
release()399 void Region::Impl::release()
400 {
401 delete this;
402 }
403
registerRegion(TraceManagerThreadLocal & ctx)404 void Region::Impl::registerRegion(TraceManagerThreadLocal& ctx)
405 {
406 #ifdef OPENCV_WITH_ITT
407 if (isITTEnabled())
408 {
409 if (!itt_id_registered)
410 {
411 CV_LOG_ITT(" Register ITT region: I=" << (void*)this << " " << ctx.threadID << "-" << global_region_id);
412 #if 1 // workaround for some ITT backends
413 itt_id = __itt_id_make((void*)(intptr_t)(((int64)(ctx.threadID + 1) << 32) | global_region_id), global_region_id);
414 #else
415 itt_id = __itt_id_make((void*)(intptr_t)(ctx.threadID + 1), global_region_id);
416 #endif
417 __itt_id_create(domain, itt_id);
418 itt_id_registered = true;
419 }
420 }
421 #else
422 CV_UNUSED(ctx);
423 #endif
424 }
425
enableSkipMode(int depth)426 void RegionStatisticsStatus::enableSkipMode(int depth)
427 {
428 CV_DbgAssert(_skipDepth < 0);
429 CV_LOG_SKIP(NULL, "SKIP-ENABLE: depth=" << depth);
430 _skipDepth = depth;
431 }
checkResetSkipMode(int leaveDepth)432 void RegionStatisticsStatus::checkResetSkipMode(int leaveDepth)
433 {
434 if (leaveDepth <= _skipDepth)
435 {
436 CV_LOG_SKIP(NULL, "SKIP-RESET: leaveDepth=" << leaveDepth << " skipDepth=" << _skipDepth);
437 _skipDepth = -1;
438 }
439 }
440
Region(const LocationStaticStorage & location)441 Region::Region(const LocationStaticStorage& location) :
442 pImpl(NULL),
443 implFlags(0)
444 {
445 // Checks:
446 // - global enable flag
447 // - parent region is disabled
448 // - children count threshold
449 // - region location
450 // - depth (opencv nested calls)
451 if (!TraceManager::isActivated())
452 {
453 CV_LOG("Trace is disabled. Bailout");
454 return;
455 }
456
457 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
458 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region(): " << (void*)this << ": " << location.name);
459
460 Region* parentRegion = ctx.stackTopRegion();
461 const Region::LocationStaticStorage* parentLocation = ctx.stackTopLocation();
462
463 if (location.flags & REGION_FLAG_REGION_NEXT)
464 {
465 if (parentRegion && parentRegion->pImpl)
466 {
467 CV_DbgAssert((parentRegion->pImpl->location.flags & REGION_FLAG_FUNCTION) == 0);
468 parentRegion->destroy(); parentRegion->implFlags = 0;
469 parentRegion = ctx.stackTopRegion();
470 parentLocation = ctx.stackTopLocation();
471 }
472 }
473
474 int parentChildren = 0;
475 if (parentRegion && parentRegion->pImpl)
476 {
477 if (parentLocation == NULL)
478 {
479 // parallel_for_body code path
480 parentChildren = CV_XADD(&parentRegion->pImpl->directChildrenCount, 1) + 1;
481 }
482 else
483 {
484 parentChildren = ++parentRegion->pImpl->directChildrenCount;
485 }
486 }
487
488 int64 beginTimestamp = getTimestamp();
489
490 int currentDepth = ctx.getCurrentDepth() + 1;
491 switch (location.flags & REGION_FLAG_IMPL_MASK)
492 {
493 #ifdef HAVE_IPP
494 case REGION_FLAG_IMPL_IPP:
495 if (!ctx.stat_status.ignoreDepthImplIPP)
496 ctx.stat_status.ignoreDepthImplIPP = currentDepth;
497 break;
498 #endif
499 #ifdef HAVE_OPENCL
500 case REGION_FLAG_IMPL_OPENCL:
501 if (!ctx.stat_status.ignoreDepthImplOpenCL)
502 ctx.stat_status.ignoreDepthImplOpenCL = currentDepth;
503 break;
504 #endif
505 #ifdef HAVE_OPENVX
506 case REGION_FLAG_IMPL_OPENVX:
507 if (!ctx.stat_status.ignoreDepthImplOpenVX)
508 ctx.stat_status.ignoreDepthImplOpenVX = currentDepth;
509 break;
510 #endif
511 default:
512 break;
513 }
514
515 ctx.stackPush(this, &location, beginTimestamp);
516 implFlags |= REGION_FLAG__NEED_STACK_POP;
517
518 if ((location.flags & REGION_FLAG_REGION_FORCE) == 0)
519 {
520 if (ctx.stat_status._skipDepth >= 0 && currentDepth > ctx.stat_status._skipDepth)
521 {
522 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region is disabled. Bailout");
523 ctx.stat.currentSkippedRegions++;
524 return;
525 }
526
527 if (param_maxRegionChildrenOpenCV > 0 && (location.flags & REGION_FLAG_APP_CODE) == 0 && parentLocation && (parentLocation->flags & REGION_FLAG_APP_CODE) == 0)
528 {
529 if (parentChildren >= param_maxRegionChildrenOpenCV)
530 {
531 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "OpenCV parent region exceeds children count. Bailout");
532 ctx.stat_status.enableSkipMode(currentDepth - 1);
533 ctx.stat.currentSkippedRegions++;
534 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
535 return;
536 }
537 }
538 if (param_maxRegionChildren > 0 && parentChildren >= param_maxRegionChildren)
539 {
540 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Parent region exceeds children count. Bailout");
541 ctx.stat_status.enableSkipMode(currentDepth - 1);
542 ctx.stat.currentSkippedRegions++;
543 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
544 return;
545 }
546 }
547
548 LocationExtraData::init(location);
549
550 if ((*location.ppExtra)->global_location_id == 0)
551 {
552 CV_LOG_TRACE_BAILOUT(NULL, _spaces(ctx.getCurrentDepth()*4) << "Region location is disabled. Bailout");
553 ctx.stat_status.enableSkipMode(currentDepth);
554 ctx.stat.currentSkippedRegions++;
555 return;
556 }
557
558 if (parentLocation && (parentLocation->flags & REGION_FLAG_SKIP_NESTED))
559 {
560 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Parent region disables inner regions. Bailout");
561 ctx.stat_status.enableSkipMode(currentDepth);
562 ctx.stat.currentSkippedRegions++;
563 return;
564 }
565
566 if (param_maxRegionDepthOpenCV)
567 {
568 if ((location.flags & REGION_FLAG_APP_CODE) == 0)
569 {
570 if (ctx.regionDepthOpenCV >= param_maxRegionDepthOpenCV)
571 {
572 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "OpenCV region depth is exceed = " << ctx.regionDepthOpenCV << ". Bailout");
573 if (ctx.stat.currentSkippedRegions == 0)
574 {
575 DEBUG_ONLY(ctx.dumpStack(std::cout, false));
576 }
577 ctx.stat_status.enableSkipMode(currentDepth);
578 ctx.stat.currentSkippedRegions++;
579 return;
580 }
581 }
582 }
583
584 new Impl(ctx, parentRegion, *this, location, beginTimestamp);
585 CV_DbgAssert(pImpl != NULL);
586 implFlags |= REGION_FLAG__ACTIVE;
587
588 // parallel_for path
589 if (parentRegion && parentRegion->pImpl)
590 {
591 if (parentLocation == NULL)
592 {
593 pImpl->directChildrenCount = parentChildren;
594 }
595 }
596 }
597
destroy()598 void Region::destroy()
599 {
600 CV_DbgAssert(implFlags != 0);
601
602 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
603 CV_LOG(_spaces(ctx.getCurrentDepth()*4) << "Region::destruct(): " << (void*)this << " pImpl=" << pImpl << " implFlags=" << implFlags << ' ' << (ctx.stackTopLocation() ? ctx.stackTopLocation()->name : "<unknown>"));
604
605 CV_DbgAssert(implFlags & REGION_FLAG__NEED_STACK_POP);
606 const int currentDepth = ctx.getCurrentDepth(); CV_UNUSED(currentDepth);
607
608 CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << ctx.stat << ' ' << ctx.stat_status);
609
610 const Region::LocationStaticStorage* location = ctx.stackTopLocation();
611 Impl::OptimizationPath myCodePath = Impl::CODE_PATH_PLAIN;
612 if (location)
613 {
614 switch (location->flags & REGION_FLAG_IMPL_MASK)
615 {
616 #ifdef HAVE_IPP
617 case REGION_FLAG_IMPL_IPP:
618 myCodePath = Impl::CODE_PATH_IPP;
619 break;
620 #endif
621 #ifdef HAVE_OPENCL
622 case REGION_FLAG_IMPL_OPENCL:
623 if (param_synchronizeOpenCL && cv::ocl::isOpenCLActivated())
624 cv::ocl::finish();
625 myCodePath = Impl::CODE_PATH_OPENCL;
626 break;
627 #endif
628 #ifdef HAVE_OPENVX
629 case REGION_FLAG_IMPL_OPENVX:
630 myCodePath = Impl::CODE_PATH_OPENVX;
631 break;
632 #endif
633 default:
634 break;
635 }
636 }
637
638 int64 endTimestamp = getTimestamp();
639 int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
640
641 bool active = isActive();
642
643 if (active)
644 ctx.stat.duration = duration;
645 else if (ctx.stack.size() == ctx.parallel_for_stack_size + 1)
646 ctx.stat.duration += duration;
647
648 switch (myCodePath) {
649 case Impl::CODE_PATH_PLAIN:
650 // nothing
651 break;
652 #ifdef HAVE_IPP
653 case Impl::CODE_PATH_IPP:
654 if (ctx.stat_status.ignoreDepthImplIPP == currentDepth)
655 {
656 ctx.stat.durationImplIPP += duration;
657 ctx.stat_status.ignoreDepthImplIPP = 0;
658 }
659 else if (active)
660 {
661 ctx.stat.durationImplIPP = duration;
662 }
663 break;
664 #endif
665 #ifdef HAVE_OPENCL
666 case Impl::CODE_PATH_OPENCL:
667 if (ctx.stat_status.ignoreDepthImplOpenCL == currentDepth)
668 {
669 ctx.stat.durationImplOpenCL += duration;
670 ctx.stat_status.ignoreDepthImplOpenCL = 0;
671 }
672 else if (active)
673 {
674 ctx.stat.durationImplOpenCL = duration;
675 }
676 break;
677 #endif
678 #ifdef HAVE_OPENVX
679 case Impl::CODE_PATH_OPENVX:
680 if (ctx.stat_status.ignoreDepthImplOpenVX == currentDepth)
681 {
682 ctx.stat.durationImplOpenVX += duration;
683 ctx.stat_status.ignoreDepthImplOpenVX = 0;
684 }
685 else if (active)
686 {
687 ctx.stat.durationImplOpenVX = duration;
688 }
689 break;
690 #endif
691 default:
692 break;
693 }
694
695 if (pImpl)
696 {
697 CV_DbgAssert((implFlags & (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP)) == (REGION_FLAG__ACTIVE | REGION_FLAG__NEED_STACK_POP));
698 CV_DbgAssert(ctx.stackTopRegion() == this);
699 pImpl->endTimestamp = endTimestamp;
700 pImpl->leaveRegion(ctx);
701 pImpl->release();
702 pImpl = NULL;
703 DEBUG_ONLY(implFlags &= ~REGION_FLAG__ACTIVE);
704 }
705 else
706 {
707 CV_DbgAssert(ctx.stat_status._skipDepth <= currentDepth);
708 }
709
710 if (implFlags & REGION_FLAG__NEED_STACK_POP)
711 {
712 CV_DbgAssert(ctx.stackTopRegion() == this);
713 ctx.stackPop();
714 ctx.stat_status.checkResetSkipMode(currentDepth);
715 DEBUG_ONLY(implFlags &= ~REGION_FLAG__NEED_STACK_POP);
716 }
717 CV_LOG_CTX_STAT(NULL, _spaces(currentDepth*4) << "===> " << ctx.stat << ' ' << ctx.stat_status);
718 }
719
720
~TraceManagerThreadLocal()721 TraceManagerThreadLocal::~TraceManagerThreadLocal()
722 {
723 }
724
dumpStack(std::ostream & out,bool onlyFunctions) const725 void TraceManagerThreadLocal::dumpStack(std::ostream& out, bool onlyFunctions) const
726 {
727 std::stringstream ss;
728 std::deque<StackEntry>::const_iterator it = stack.begin();
729 std::deque<StackEntry>::const_iterator end = stack.end();
730 int depth = 0;
731 for (; it != end; ++it)
732 {
733 const Region::LocationStaticStorage* location = it->location;
734 if (location)
735 {
736 if (!onlyFunctions || (location->flags & REGION_FLAG_FUNCTION))
737 {
738 ss << _spaces(4*depth) << location->name << std::endl;
739 depth++;
740 }
741 }
742 else
743 {
744 ss << _spaces(4*depth) << "<unknown>" << std::endl;
745 depth++;
746 }
747 }
748 out << ss.str();
749 }
750
751 class AsyncTraceStorage CV_FINAL : public TraceStorage
752 {
753 mutable std::ofstream out;
754 public:
755 const std::string name;
756
AsyncTraceStorage(const std::string & filename)757 AsyncTraceStorage(const std::string& filename) :
758 out(filename.c_str(), std::ios::trunc),
759 name(filename)
760 {
761 out << "#description: OpenCV trace file" << std::endl;
762 out << "#version: 1.0" << std::endl;
763 }
~AsyncTraceStorage()764 ~AsyncTraceStorage()
765 {
766 out.close();
767 }
768
put(const TraceMessage & msg) const769 bool put(const TraceMessage& msg) const CV_OVERRIDE
770 {
771 if (msg.hasError)
772 return false;
773 out << msg.buffer;
774 //DEBUG_ONLY(std::flush(out)); // TODO configure flag
775 return true;
776 }
777 };
778
779 class SyncTraceStorage CV_FINAL : public TraceStorage
780 {
781 mutable std::ofstream out;
782 mutable cv::Mutex mutex;
783 public:
784 const std::string name;
785
SyncTraceStorage(const std::string & filename)786 SyncTraceStorage(const std::string& filename) :
787 out(filename.c_str(), std::ios::trunc),
788 name(filename)
789 {
790 out << "#description: OpenCV trace file" << std::endl;
791 out << "#version: 1.0" << std::endl;
792 }
~SyncTraceStorage()793 ~SyncTraceStorage()
794 {
795 cv::AutoLock l(mutex);
796 out.close();
797 }
798
put(const TraceMessage & msg) const799 bool put(const TraceMessage& msg) const CV_OVERRIDE
800 {
801 if (msg.hasError)
802 return false;
803 {
804 cv::AutoLock l(mutex);
805 out << msg.buffer;
806 std::flush(out); // TODO configure flag
807 }
808 return true;
809 }
810 };
811
812
getStorage() const813 TraceStorage* TraceManagerThreadLocal::getStorage() const
814 {
815 // TODO configuration option for stdout/single trace file
816 if (storage.empty())
817 {
818 TraceStorage* global = getTraceManager().trace_storage.get();
819 if (global)
820 {
821 const std::string filepath = cv::format("%s-%03d.txt", getParameterTraceLocation().c_str(), threadID).c_str();
822 TraceMessage msg;
823 const char* pos = strrchr(filepath.c_str(), '/'); // extract filename
824 #ifdef _WIN32
825 if (!pos)
826 pos = strrchr(filepath.c_str(), '\\');
827 #endif
828 if (!pos)
829 pos = filepath.c_str();
830 else
831 pos += 1; // fix to skip extra slash in filename beginning
832 msg.printf("#thread file: %s\n", pos);
833 global->put(msg);
834 storage.reset(new AsyncTraceStorage(filepath));
835 }
836 }
837 return storage.get();
838 }
839
840
841
842 static bool activated = false;
843 static bool isInitialized = false;
844
TraceManager()845 TraceManager::TraceManager()
846 {
847 g_zero_timestamp = cv::getTickCount();
848
849 isInitialized = true;
850 CV_LOG("TraceManager ctor: " << (void*)this);
851
852 CV_LOG("TraceManager configure()");
853 activated = getParameterTraceEnable();
854
855 if (activated)
856 trace_storage.reset(new SyncTraceStorage(std::string(getParameterTraceLocation()) + ".txt"));
857
858 #ifdef OPENCV_WITH_ITT
859 if (isITTEnabled())
860 {
861 activated = true; // force trace pipeline activation (without OpenCV storage)
862 __itt_region_begin(domain, __itt_null, __itt_null, __itt_string_handle_create("OpenCVTrace"));
863 }
864 #endif
865 }
~TraceManager()866 TraceManager::~TraceManager()
867 {
868 CV_LOG("TraceManager dtor: " << (void*)this);
869
870 #ifdef OPENCV_WITH_ITT
871 if (isITTEnabled())
872 {
873 __itt_region_end(domain, __itt_null);
874 }
875 #endif
876
877 std::vector<TraceManagerThreadLocal*> threads_ctx;
878 tls.gather(threads_ctx);
879 size_t totalEvents = 0, totalSkippedEvents = 0;
880 for (size_t i = 0; i < threads_ctx.size(); i++)
881 {
882 TraceManagerThreadLocal* ctx = threads_ctx[i];
883 if (ctx)
884 {
885 totalEvents += ctx->region_counter;
886 totalSkippedEvents += ctx->totalSkippedEvents;
887 }
888 }
889 if (totalEvents || activated)
890 {
891 CV_LOG_INFO(NULL, "Trace: Total events: " << totalEvents);
892 }
893 if (totalSkippedEvents)
894 {
895 CV_LOG_WARNING(NULL, "Trace: Total skipped events: " << totalSkippedEvents);
896 }
897
898 // This is a global static object, so process starts shutdown here
899 // Turn off trace
900 cv::__termination = true; // also set in DllMain() notifications handler for DLL_PROCESS_DETACH
901 activated = false;
902 }
903
isActivated()904 bool TraceManager::isActivated()
905 {
906 // Check if process starts shutdown, and set earlyExit to true
907 // to prevent further instrumentation processing earlier.
908 if (cv::__termination)
909 {
910 activated = false;
911 return false;
912 }
913
914 if (!isInitialized)
915 {
916 TraceManager& m = getTraceManager();
917 CV_UNUSED(m); // TODO
918 }
919
920 return activated;
921 }
922
923
getTraceManagerCallOnce()924 static TraceManager* getTraceManagerCallOnce()
925 {
926 static TraceManager globalInstance;
927 return &globalInstance;
928 }
getTraceManager()929 TraceManager& getTraceManager()
930 {
931 CV_SINGLETON_LAZY_INIT_REF(TraceManager, getTraceManagerCallOnce())
932 }
933
parallelForSetRootRegion(const Region & rootRegion,const TraceManagerThreadLocal & root_ctx)934 void parallelForSetRootRegion(const Region& rootRegion, const TraceManagerThreadLocal& root_ctx)
935 {
936 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
937
938 if (ctx.dummy_stack_top.region == &rootRegion) // already attached
939 return;
940
941 CV_Assert(ctx.dummy_stack_top.region == NULL);
942 ctx.dummy_stack_top = TraceManagerThreadLocal::StackEntry(const_cast<Region*>(&rootRegion), NULL, -1);
943
944 if (&ctx == &root_ctx)
945 {
946 ctx.stat.grab(ctx.parallel_for_stat);
947 ctx.parallel_for_stat_status = ctx.stat_status;
948 ctx.parallel_for_stack_size = ctx.stack.size();
949 return;
950 }
951
952 CV_Assert(ctx.stack.empty());
953
954 ctx.currentActiveRegion = const_cast<Region*>(&rootRegion);
955
956 ctx.regionDepth = root_ctx.regionDepth;
957 ctx.regionDepthOpenCV = root_ctx.regionDepthOpenCV;
958
959 ctx.parallel_for_stack_size = 0;
960
961 ctx.stat_status.propagateFrom(root_ctx.stat_status);
962 }
963
parallelForAttachNestedRegion(const Region & rootRegion)964 void parallelForAttachNestedRegion(const Region& rootRegion)
965 {
966 CV_UNUSED(rootRegion);
967 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
968
969 CV_DbgAssert(ctx.dummy_stack_top.region == &rootRegion);
970
971 Region* region = ctx.getCurrentActiveRegion();
972 CV_LOG_PARALLEL(NULL, " PARALLEL_FOR: " << (void*)region << " ==> " << &rootRegion);
973 if (!region)
974 return;
975
976 #ifdef OPENCV_WITH_ITT
977 if (!rootRegion.pImpl || !rootRegion.pImpl->itt_id_registered)
978 return;
979
980 if (!region->pImpl)
981 return;
982
983 CV_LOG_PARALLEL(NULL, " PARALLEL_FOR ITT: " << (void*)rootRegion.pImpl->itt_id.d1 << ":" << rootRegion.pImpl->itt_id.d2 << ":" << (void*)rootRegion.pImpl->itt_id.d3 << " => "
984 << (void*)region->pImpl->itt_id.d1 << ":" << region->pImpl->itt_id.d2 << ":" << (void*)region->pImpl->itt_id.d3);
985 __itt_relation_add(domain, region->pImpl->itt_id, __itt_relation_is_child_of, rootRegion.pImpl->itt_id);
986 #endif
987 }
988
parallelForFinalize(const Region & rootRegion)989 void parallelForFinalize(const Region& rootRegion)
990 {
991 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
992
993 int64 endTimestamp = getTimestamp();
994 int64 duration = endTimestamp - ctx.stackTopBeginTimestamp();
995 CV_LOG_PARALLEL(NULL, "parallel_for duration: " << duration << " " << &rootRegion);
996
997 std::vector<TraceManagerThreadLocal*> threads_ctx;
998 getTraceManager().tls.gather(threads_ctx);
999 RegionStatistics parallel_for_stat;
1000 int threads = 0;
1001 for (size_t i = 0; i < threads_ctx.size(); i++)
1002 {
1003 TraceManagerThreadLocal* child_ctx = threads_ctx[i];
1004
1005 if (child_ctx && child_ctx->stackTopRegion() == &rootRegion)
1006 {
1007 CV_LOG_PARALLEL(NULL, "Thread=" << child_ctx->threadID << " " << child_ctx->stat);
1008 threads++;
1009 RegionStatistics child_stat;
1010 child_ctx->stat.grab(child_stat);
1011 parallel_for_stat.append(child_stat);
1012 if (child_ctx != &ctx)
1013 {
1014 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1015 }
1016 else
1017 {
1018 ctx.parallel_for_stat.grab(ctx.stat);
1019 ctx.stat_status = ctx.parallel_for_stat_status;
1020 child_ctx->dummy_stack_top = TraceManagerThreadLocal::StackEntry();
1021 }
1022 }
1023 }
1024 float parallel_coeff = std::min(1.0f, duration / (float)(parallel_for_stat.duration));
1025 CV_LOG_PARALLEL(NULL, "parallel_coeff=" << 1.0f / parallel_coeff);
1026 CV_LOG_PARALLEL(NULL, parallel_for_stat);
1027 if (parallel_coeff != 1.0f)
1028 {
1029 parallel_for_stat.multiply(parallel_coeff);
1030 CV_LOG_PARALLEL(NULL, parallel_for_stat);
1031 }
1032 parallel_for_stat.duration = 0;
1033 ctx.stat.append(parallel_for_stat);
1034 CV_LOG_PARALLEL(NULL, ctx.stat);
1035 }
1036
1037 struct TraceArg::ExtraData
1038 {
1039 #ifdef OPENCV_WITH_ITT
1040 // Special fields for ITT
1041 __itt_string_handle* volatile ittHandle_name;
1042 #endif
ExtraDatacv::utils::trace::details::TraceArg::ExtraData1043 ExtraData(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1044 {
1045 CV_UNUSED(ctx); CV_UNUSED(arg);
1046 #ifdef OPENCV_WITH_ITT
1047 if (isITTEnabled())
1048 {
1049 // Caching is not required here, because there is builtin cache.
1050 // https://software.intel.com/en-us/node/544203:
1051 // Consecutive calls to __itt_string_handle_create with the same name return the same value.
1052 ittHandle_name = __itt_string_handle_create(arg.name);
1053 }
1054 else
1055 {
1056 ittHandle_name = 0;
1057 }
1058 #endif
1059 }
1060 };
1061
initTraceArg(TraceManagerThreadLocal & ctx,const TraceArg & arg)1062 static void initTraceArg(TraceManagerThreadLocal& ctx, const TraceArg& arg)
1063 {
1064 TraceArg::ExtraData** pExtra = arg.ppExtra;
1065 if (*pExtra == NULL)
1066 {
1067 cv::AutoLock lock(cv::getInitializationMutex());
1068 if (*pExtra == NULL)
1069 {
1070 *pExtra = new TraceArg::ExtraData(ctx, arg);
1071 }
1072 }
1073 }
traceArg(const TraceArg & arg,const char * value)1074 void traceArg(const TraceArg& arg, const char* value)
1075 {
1076 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1077 Region* region = ctx.getCurrentActiveRegion();
1078 if (!region)
1079 return;
1080 CV_Assert(region->pImpl);
1081 initTraceArg(ctx, arg);
1082 if (!value)
1083 value = "<null>";
1084 #ifdef OPENCV_WITH_ITT
1085 if (isITTEnabled())
1086 {
1087 __itt_metadata_str_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, value, strlen(value));
1088 }
1089 #endif
1090 }
traceArg(const TraceArg & arg,int value)1091 void traceArg(const TraceArg& arg, int value)
1092 {
1093 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1094 Region* region = ctx.getCurrentActiveRegion();
1095 if (!region)
1096 return;
1097 CV_Assert(region->pImpl);
1098 initTraceArg(ctx, arg);
1099 #ifdef OPENCV_WITH_ITT
1100 if (isITTEnabled())
1101 {
1102 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, sizeof(int) == 4 ? __itt_metadata_s32 : __itt_metadata_s64, 1, &value);
1103 }
1104 #else
1105 CV_UNUSED(value);
1106 #endif
1107 }
traceArg(const TraceArg & arg,int64 value)1108 void traceArg(const TraceArg& arg, int64 value)
1109 {
1110 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1111 Region* region = ctx.getCurrentActiveRegion();
1112 if (!region)
1113 return;
1114 CV_Assert(region->pImpl);
1115 initTraceArg(ctx, arg);
1116 #ifdef OPENCV_WITH_ITT
1117 if (isITTEnabled())
1118 {
1119 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_s64, 1, &value);
1120 }
1121 #else
1122 CV_UNUSED(value);
1123 #endif
1124 }
traceArg(const TraceArg & arg,double value)1125 void traceArg(const TraceArg& arg, double value)
1126 {
1127 TraceManagerThreadLocal& ctx = getTraceManager().tls.getRef();
1128 Region* region = ctx.getCurrentActiveRegion();
1129 if (!region)
1130 return;
1131 CV_Assert(region->pImpl);
1132 initTraceArg(ctx, arg);
1133 #ifdef OPENCV_WITH_ITT
1134 if (isITTEnabled())
1135 {
1136 __itt_metadata_add(domain, region->pImpl->itt_id, (*arg.ppExtra)->ittHandle_name, __itt_metadata_double, 1, &value);
1137 }
1138 #else
1139 CV_UNUSED(value);
1140 #endif
1141 }
1142
1143 #else
1144
1145 Region::Region(const LocationStaticStorage&) : pImpl(NULL), implFlags(0) {}
1146 void Region::destroy() {}
1147
1148 void traceArg(const TraceArg&, const char*) {}
1149 void traceArg(const TraceArg&, int) {};
1150 void traceArg(const TraceArg&, int64) {};
1151 void traceArg(const TraceArg&, double) {};
1152
1153 #endif
1154
1155 }}}} // namespace
1156