1 // 2 // Copyright 2016 Pixar 3 // 4 // Licensed under the Apache License, Version 2.0 (the "Apache License") 5 // with the following modification; you may not use this file except in 6 // compliance with the Apache License and the following modification to it: 7 // Section 6. Trademarks. is deleted and replaced with: 8 // 9 // 6. Trademarks. This License does not grant permission to use the trade 10 // names, trademarks, service marks, or product names of the Licensor 11 // and its affiliates, except as required to comply with Section 4(c) of 12 // the License and to reproduce the content of the NOTICE file. 13 // 14 // You may obtain a copy of the Apache License at 15 // 16 // http://www.apache.org/licenses/LICENSE-2.0 17 // 18 // Unless required by applicable law or agreed to in writing, software 19 // distributed under the Apache License with the above modification is 20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 // KIND, either express or implied. See the Apache License for the specific 22 // language governing permissions and limitations under the Apache License. 23 // 24 #ifndef PXR_BASE_TF_MALLOC_TAG_H 25 #define PXR_BASE_TF_MALLOC_TAG_H 26 27 #include "pxr/pxr.h" 28 #include "pxr/base/tf/api.h" 29 30 #include <cstdlib> 31 #include <iosfwd> 32 #include <stdint.h> 33 #include <string> 34 #include <vector> 35 36 PXR_NAMESPACE_OPEN_SCOPE 37 38 /// \file tf/mallocTag.h 39 /// \ingroup group_tf_MallocTag 40 41 struct Tf_MallocPathNode; 42 43 /// \class TfMallocTag 44 /// \ingroup group_tf_MallocTag 45 /// 46 /// Top-down memory tagging system. 47 /// 48 /// See \ref page_tf_MallocTag for a detailed description. 49 class TfMallocTag { 50 public: 51 struct CallStackInfo; 52 53 /// \struct CallTree 54 /// Summary data structure for \c malloc statistics. 55 /// 56 /// The \c CallTree structure is used to deliver a snapshot of the current 57 /// malloc usage. It is accessible as publicly modifiable data because it 58 /// is simply a returned snapshot of the current memory state. 59 struct CallTree { 60 /// \struct PathNode 61 /// Node in the call tree structure. 62 /// 63 /// A \c PathNode captures the hierarchy of active \c TfAutoMallocTag 64 /// objects that are pushed and popped during program execution. Each 65 /// \c PathNode thus describes a sequence of call-sites (i.e. a path 66 /// down the call tree). Repeated call sites (in the case of 67 /// co-recursive function calls) can be skipped, e.g. pushing tags 68 /// "A", "B", "C", "B", "C" leads to only three path-nodes, 69 /// representing the paths "A", "AB", and "ABC". Allocations done at 70 /// the bottom (i.e. when tags "A", "B", "C", "B", "C" are all active) 71 /// are billed to the longest path node in the sequence, which 72 /// corresponds to the path "ABC"). 73 /// 74 /// Path nodes track both the memory they incur directly (\c 75 /// nBytesDirect) but more importantly, the total memory allocated by 76 /// themselves and any of their children (\c nBytes). The name of a 77 /// node (\c siteName) corresponds to the tag name of the final tag in 78 /// the path. 79 struct PathNode { 80 size_t nBytes, ///< Allocated bytes by this or descendant nodes. 81 nBytesDirect; ///< Allocated bytes (only for this node). 82 size_t nAllocations; ///< The number of allocations for this node. 83 std::string siteName; ///< Tag name. 84 std::vector<PathNode> 85 children; ///< Children nodes. 86 }; 87 88 /// \struct CallSite 89 /// Record of the bytes allocated under each different tag. 90 /// 91 /// Each construction of a \c TfAutoMallocTag object with a different 92 /// argument produces a distinct \c CallSite record. The total bytes 93 /// outstanding for all memory allocations made under a given 94 /// call-site are recorded in \c nBytes, while the name of the call 95 /// site is available as \c name. 96 struct CallSite { 97 std::string name; ///< Tag name. 98 size_t nBytes; ///< Allocated bytes. 99 }; 100 101 // Note: enum below must be kept in sync with tfmodule/mallocCallTree.h 102 103 /// Specify which parts of the report to print. 104 enum PrintSetting { 105 TREE = 0, ///< Print the full call tree 106 CALLSITES, ///< Print just the call sites > 0.1% 107 BOTH ///< Print both tree and call sites 108 }; 109 110 /// Return the malloc report string. 111 /// 112 /// Get a malloc report of the tree and/or callsites. 113 /// 114 /// The columns in the report are abbreviated. Here are the definitions. 115 /// 116 /// \b TAGNAME : The name of the tag being tracked. This matches the 117 /// string argument to TfAutoMallocTag constructor. 118 /// 119 /// \b BytesIncl : Bytes Inclusive. This includes all bytes allocated by 120 /// this tag and any bytes of its children. 121 /// 122 /// \b BytesExcl : Bytes Exclusive. Only bytes allocated exclusively by 123 /// this tag, not including any bytes of its children. 124 /// 125 /// \b %%Prnt : (%% Parent). me.BytesIncl / parent.BytesIncl * 100 126 /// 127 /// \b %%Exc : BytesExcl / BytesIncl * 100 128 /// 129 /// \b %%Totl : (%% Total). BytesExcl / TotalBytes * 100 130 TF_API 131 std::string GetPrettyPrintString(PrintSetting setting = BOTH, 132 size_t maxPrintedNodes = 100000) const; 133 134 /// Generates a report to the ostream \p out. 135 /// 136 /// This report is printed in a way that is intended to be used by 137 /// xxtracediff. If \p rootName is non-empty it will replace the name 138 /// of the tree root in the report. 139 TF_API 140 void Report( 141 std::ostream &out, 142 const std::string &rootName) const; 143 144 /// \overload 145 TF_API 146 void Report( 147 std::ostream &out) const; 148 149 /// All call sites. 150 std::vector<CallSite> callSites; 151 152 /// Root node of the call-site hierarchy. 153 PathNode root; 154 155 /// The captured malloc stacks. 156 std::vector<CallStackInfo> capturedCallStacks; 157 }; 158 159 /// \struct CallStackInfo 160 /// This struct is used to represent a call stack taken for an allocation 161 /// that was billed under a specific malloc tag. 162 struct CallStackInfo 163 { 164 /// The stack frame pointers. 165 std::vector<uintptr_t> stack; 166 167 /// The amount of allocated memory (accumulated over all allocations 168 /// sharing this call stack). 169 size_t size; 170 171 /// The number of allocations (always one unless stack frames have 172 /// been combined to create unique stacks). 173 size_t numAllocations; 174 }; 175 176 /// Initialize the memory tagging system. 177 /// 178 /// This function returns \c true if the memory tagging system can be 179 /// successfully initialized or it has already been initialized. Otherwise, 180 /// \p *errMsg is set with an explanation for the failure. 181 /// 182 /// Until the system is initialized, the various memory reporting calls 183 /// will indicate that no memory has been allocated. Note also that 184 /// memory allocated prior to calling \c Initialize() is not tracked i.e. 185 /// all data refers to allocations that happen subsequent to calling \c 186 /// Initialize(). 187 TF_API static bool Initialize(std::string* errMsg); 188 189 /// Return true if the tagging system is active. 190 /// 191 /// If \c Initialize() has been successfully called, this function returns 192 /// \c true. IsInitialized()193 static bool IsInitialized() { 194 return TfMallocTag::_doTagging; 195 } 196 197 /// Return total number of allocated bytes. 198 /// 199 /// The current total memory that has been allocated and not freed is 200 /// returned. Memory allocated before calling \c Initialize() is not 201 /// accounted for. 202 TF_API static size_t GetTotalBytes(); 203 204 /// Return the maximum total number of bytes that have ever been allocated 205 /// at one time. 206 /// 207 /// This is simply the maximum value of GetTotalBytes() since Initialize() 208 /// was called. 209 TF_API static size_t GetMaxTotalBytes(); 210 211 /// Return a snapshot of memory usage. 212 /// 213 /// Returns a snapshot by writing into \c *tree. See the \c C *tree 214 /// structure for documentation. If \c Initialize() has not been called, 215 /// \ *tree is set to a rather blank structure (empty vectors, empty 216 /// strings, zero in all integral fields) and \c false is returned; 217 /// otherwise, \p *tree is set with the contents of the current memory 218 /// snapshot and \c true is returned. It is fine to call this function on 219 /// the same \p *tree instance; each call simply overwrites the data from 220 /// the last call. If /p skipRepeated is \c true, then any repeated 221 /// callsite is skipped. See the \c CallTree documentation for more 222 /// details. 223 TF_API static bool GetCallTree(CallTree* tree, bool skipRepeated = true); 224 225 private: 226 // Enum describing whether allocations are being tagged in an associated 227 // thread. 228 enum _Tagging { 229 _TaggingEnabled, // Allocations are being tagged 230 _TaggingDisabled, // Allocations are not being tagged 231 232 _TaggingDormant // Tagging has not been initialized in this 233 // thread as no malloc tags have been pushed onto 234 // the stack. 235 }; 236 237 struct _ThreadData; 238 239 public: 240 241 /// \class Auto 242 /// \ingroup group_tf_MallocTag 243 /// 244 /// Scoped (i.e. local) object for creating/destroying memory tags. 245 /// 246 /// Note: \c TfAutoMallocTag is a typedef to \c TfMallocTag::Auto; the 247 /// convention is to use \c TfAutoMallocTag to make it clear that the 248 /// local object exists only because its constructor and destructor modify 249 /// program state. 250 /// 251 /// A \c TfAutoMallocTag object is used to push a memory tag onto the 252 /// current call stack; destruction of the object pops the call stack. 253 /// Note that each thread has its own call-stack. 254 /// 255 /// There is no (measurable) cost to creating or destroying memory tags if 256 /// \c TfMallocTag::Initialize() has not been called; if it has, then 257 /// there is a small (but measurable) cost associated with pushing and 258 /// popping memory tags on the local call stack. Most of the cost is 259 /// simply locking a mutex; typically, pushing or popping the call stack 260 /// does not actually cause any memory allocation unless this is the first 261 /// time that the given named tag has been encountered. 262 class Auto { 263 public: 264 Auto(const Auto &) = delete; 265 Auto& operator=(const Auto &) = delete; 266 267 Auto(Auto &&) = delete; 268 Auto& operator=(Auto &&) = delete; 269 270 /// Push a memory tag onto the local-call stack with name \p name. 271 /// 272 /// If \c TfMallocTag::Initialize() has not been called, this 273 /// constructor does essentially no (measurable) work, assuming \p 274 /// name is a string literal or just a pointer to an existing string. 275 /// 276 /// Objects of this class should only be created as local variables; 277 /// never as member variables, global variables, or via \c new. If 278 /// you can't create your object as a local variable, you can make 279 /// manual calls to \c TfMallocTag::Push() and \c TfMallocTag::Pop(), 280 /// though you should do this only as a last resort. Auto(const char * name)281 Auto(const char* name) : _threadData(0) { 282 if (TfMallocTag::_doTagging) 283 _Begin(name); 284 } 285 286 /// Push a memory tag onto the local-call stack with name \p name. 287 /// 288 /// If \c TfMallocTag::Initialize() has not been called, this 289 /// constructor does essentially no (measurable) work. However, any 290 /// work done in constructing the \c std::string object \p name will 291 /// be incurred even if tagging is not active. If this is an issue, 292 /// you can query \c TfMallocTag::IsInitialized() to avoid unneeded 293 /// work when tagging is inactive. Note that the case when \p name is 294 /// a string literal does not apply here: instead, the constructor that 295 /// takes a \c const \c char* (above) will be called. 296 /// 297 /// Objects of this class should only be created as local variables; 298 /// never as member variables, global variables, or via \c new. If 299 /// you can't create your object as a local variable, you can make 300 /// manual calls to \c TfMallocTag::Push() and \c TfMallocTag::Pop(), 301 /// though you should do this only as a last resort. Auto(const std::string & name)302 Auto(const std::string& name) : _threadData(0) { 303 if (TfMallocTag::_doTagging) 304 _Begin(name); 305 } 306 307 /// Pop the tag from the stack before it is destructed. 308 /// 309 /// Normally you should not use this. The normal destructor is 310 /// preferable because it insures proper release order. If you call 311 /// \c Release(), make sure all tags are released in the opposite 312 /// order they were declared in. It is better to use sub-scopes to 313 /// control the life span of tags, but if that won't work, \c 314 /// Release() is still preferable to \c TfMallocTag::Push() and \c 315 /// TfMallocTag::Pop() because it isn't vulnerable to early returns or 316 /// exceptions. Release()317 inline void Release() { 318 if (_threadData) { 319 _End(); 320 _threadData = NULL; 321 } 322 } 323 324 /// Pop a memory tag from the local-call stack. 325 /// 326 /// If \c TfMallocTag::Initialize() was not called when this tag was 327 /// pushed onto the stack, popping the tag from the stack does 328 /// essentially no (measurable) work. ~Auto()329 inline ~Auto() { 330 Release(); 331 } 332 333 private: 334 TF_API void _Begin(const char* name); 335 TF_API void _Begin(const std::string& name); 336 TF_API void _End(); 337 338 _ThreadData* _threadData; 339 340 friend class TfMallocTag; 341 }; 342 343 /// \class Auto2 344 /// \ingroup group_tf_MallocTag 345 /// 346 /// Scoped (i.e. local) object for creating/destroying memory tags. 347 /// 348 /// Auto2 is just like Auto, except it pushes two tags onto the stack. 349 class Auto2 { 350 public: 351 /// Push two memory tags onto the local-call stack. 352 /// 353 /// \see TfMallocTag::Auto(const char* name). Auto2(const char * name1,const char * name2)354 Auto2(const char* name1, const char* name2) : 355 _tag1(name1), 356 _tag2(name2) 357 { 358 } 359 360 /// Push two memory tags onto the local-call stack. 361 /// 362 /// \see TfMallocTag::Auto(const std::string& name). Auto2(const std::string & name1,const std::string & name2)363 Auto2(const std::string& name1, const std::string& name2) : 364 _tag1(name1), 365 _tag2(name2) 366 { 367 } 368 369 /// Pop two memory tags from the local-call stack. 370 /// 371 /// \see TfMallocTag::Auto(const char* name). Release()372 void Release() { 373 _tag2.Release(); 374 _tag1.Release(); 375 } 376 377 private: 378 Auto _tag1; 379 Auto _tag2; 380 }; 381 382 /// Manually push a tag onto the stack. 383 /// 384 /// This call has the same effect as the constructor for \c 385 /// TfMallocTag::Auto (aka \c TfAutoMallocTag), however a matching call to 386 /// \c Pop() is required. 387 /// 388 /// Note that initializing the tagging system between matching calls to \c 389 /// Push() and \c Pop() is ill-advised, which is yet another reason to 390 /// prefer using \c TfAutoMallocTag whenever possible. Push(const std::string & name)391 static void Push(const std::string& name) { 392 TfMallocTag::Auto noname(name); 393 noname._threadData = NULL; // disable destructor 394 } 395 396 /// \overload Push(const char * name)397 static void Push(const char* name) { 398 TfMallocTag::Auto noname(name); 399 noname._threadData = NULL; // disable destructor 400 } 401 402 /// Manually pop a tag from the stack. 403 /// 404 /// This call has the same effect as the destructor for \c 405 /// TfMallocTag::Auto; it must properly nest with a matching call to \c 406 /// Push(), of course. 407 /// 408 /// If \c name is supplied and does not match the tag at the top of the 409 /// stack, a warning message is issued. 410 TF_API static void Pop(const char* name = NULL); 411 412 /// \overload Pop(const std::string & name)413 static void Pop(const std::string& name) { 414 Pop(name.c_str()); 415 } 416 417 /// Sets the tags to trap in the debugger. 418 /// 419 /// When memory is allocated or freed for any tag that matches \p 420 /// matchList the debugger trap is invoked. If a debugger is attached the 421 /// program will stop in the debugger, otherwise the program will continue 422 /// to run. See \c ArchDebuggerTrap() and \c ArchDebuggerWait(). 423 /// 424 /// \p matchList is a comma, tab or newline separated list of malloc tag 425 /// names. The names can have internal spaces but leading and trailing 426 /// spaces are stripped. If a name ends in '*' then the suffix is 427 /// wildcarded. A name can have a leading '-' or '+' to prevent or allow a 428 /// match. Each name is considered in order and later matches override 429 /// earlier matches. For example, 'Csd*, -CsdScene::_Populate*, 430 /// +CsdScene::_PopulatePrimCacheLocal' matches any malloc tag starting 431 /// with 'Csd' but nothing starting with 'CsdScene::_Populate' except 432 /// 'CsdScene::_PopulatePrimCacheLocal'. Use the empty string to disable 433 /// debugging traps. 434 TF_API static void SetDebugMatchList(const std::string& matchList); 435 436 /// Sets the tags to trace. 437 /// 438 /// When memory is allocated for any tag that matches \p matchList a stack 439 /// trace is recorded. When that memory is released the stack trace is 440 /// discarded. Clients can call \c GetCapturedMallocStacks() to get a 441 /// list of all recorded stack traces. This is useful for finding leaks. 442 /// 443 /// Traces recorded for any tag that will no longer be matched are 444 /// discarded by this call. Traces recorded for tags that continue to be 445 /// matched are retained. 446 /// 447 /// \p matchList is a comma, tab or newline separated list of malloc tag 448 /// names. The names can have internal spaces but leading and trailing 449 /// spaces are stripped. If a name ends in '*' then the suffix is 450 /// wildcarded. A name can have a leading '-' or '+' to prevent or allow 451 /// a match. Each name is considered in order and later matches override 452 /// earlier matches. For example, 'Csd*, -CsdScene::_Populate*, 453 /// +CsdScene::_PopulatePrimCacheLocal' matches any malloc tag starting 454 /// with 'Csd' but nothing starting with 'CsdScene::_Populate' except 455 /// 'CsdScene::_PopulatePrimCacheLocal'. Use the empty string to disable 456 /// stack capturing. 457 TF_API static void SetCapturedMallocStacksMatchList(const std::string& matchList); 458 459 /// Returns the captured malloc stack traces for allocations billed to the 460 /// malloc tags passed to SetCapturedMallocStacksMatchList(). 461 /// 462 /// \note This method also clears the internally held set of captured 463 /// stacks. 464 TF_API static std::vector<std::vector<uintptr_t> > GetCapturedMallocStacks(); 465 466 private: 467 friend struct Tf_MallocGlobalData; 468 469 class _TemporaryTaggingState { 470 public: 471 explicit _TemporaryTaggingState(_Tagging state); 472 ~_TemporaryTaggingState(); 473 474 _TemporaryTaggingState(const _TemporaryTaggingState &); 475 _TemporaryTaggingState& operator=(const _TemporaryTaggingState &); 476 477 _TemporaryTaggingState(_TemporaryTaggingState &&); 478 _TemporaryTaggingState& operator=(_TemporaryTaggingState &&); 479 480 private: 481 _Tagging _oldState; 482 }; 483 484 static void _SetTagging(_Tagging state); 485 static _Tagging _GetTagging(); 486 487 static bool _Initialize(std::string* errMsg); 488 489 static inline bool _ShouldNotTag(_ThreadData**, _Tagging* t = NULL); 490 static inline Tf_MallocPathNode* _GetCurrentPathNodeNoLock( 491 const _ThreadData* threadData); 492 493 static void* _MallocWrapper_ptmalloc(size_t, const void*); 494 static void* _ReallocWrapper_ptmalloc(void*, size_t, const void*); 495 static void* _MemalignWrapper_ptmalloc(size_t, size_t, const void*); 496 static void _FreeWrapper_ptmalloc(void*, const void*); 497 498 static void* _MallocWrapper(size_t, const void*); 499 static void* _ReallocWrapper(void*, size_t, const void*); 500 static void* _MemalignWrapper(size_t, size_t, const void*); 501 static void _FreeWrapper(void*, const void*); 502 503 friend class TfMallocTag::Auto; 504 class Tls; 505 friend class TfMallocTag::Tls; 506 TF_API static bool _doTagging; 507 }; 508 509 /// Top-down memory tagging system. 510 typedef TfMallocTag::Auto TfAutoMallocTag; 511 512 /// Top-down memory tagging system. 513 typedef TfMallocTag::Auto2 TfAutoMallocTag2; 514 515 /// Enable lib/tf memory management. 516 /// 517 /// Invoking this macro inside a class body causes the class operator \c new to push 518 /// two \c TfAutoMallocTag objects onto the stack before actually allocating memory for the 519 /// class. The names passed into the tag are used for the two tags; pass NULL if you 520 /// don't need the second tag. For example, 521 /// \code 522 /// class MyBigMeshVertex { 523 /// public: 524 /// TF_MALLOC_TAG_NEW("MyBigMesh", "Vertex"); 525 /// ... 526 /// } 527 /// \endcode 528 /// will cause dynamic allocations of \c MyBigMeshVertex to be grouped under 529 /// the tag \c Vertex which is in turn grouped under \c MyBigMesh. However, 530 /// \code 531 /// class MyBigMesh { 532 /// public: 533 /// TF_MALLOC_TAG_NEW("MyBigMesh", NULL); 534 /// ... 535 /// } 536 /// \endcode 537 /// specifies \c NULL for the second tag because the first tag is sufficient. 538 /// 539 /// Normally, this macro should be placed in the public section of a class. 540 /// Note that you cannot specify both this and \c TF_FIXED_SIZE_ALLOCATOR() 541 /// for the same class. 542 /// 543 /// Also, note that allocations of a class inside an STL datastructure will 544 /// not be grouped under the indicated tags. 545 /// \remark Placed in .h files. 546 /// 547 /// \hideinitializer 548 // 549 PXR_NAMESPACE_CLOSE_SCOPE 550 551 #define TF_MALLOC_TAG_NEW(name1, name2) \ 552 /* this is for STL purposes */ \ 553 inline void* operator new(::std::size_t, void* ptr) { \ 554 return ptr; \ 555 } \ 556 \ 557 inline void* operator new(::std::size_t s) { \ 558 PXR_NS::TfAutoMallocTag tag1(name1); \ 559 PXR_NS::TfAutoMallocTag tag2(name2); \ 560 return malloc(s); \ 561 } \ 562 \ 563 inline void* operator new[](::std::size_t s) { \ 564 PXR_NS::TfAutoMallocTag tag1(name1); \ 565 PXR_NS::TfAutoMallocTag tag2(name2); \ 566 return malloc(s); \ 567 } \ 568 \ 569 /* Required due to the placement-new override above. */ \ 570 inline void operator delete(void* ptr, void* place) {} \ 571 \ 572 inline void operator delete(void* ptr, size_t) { \ 573 free(ptr); \ 574 } \ 575 \ 576 inline void operator delete[] (void* ptr, size_t) { \ 577 free(ptr); \ 578 } \ 579 580 #endif 581