1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 //    names, trademarks, service marks, or product names of the Licensor
11 //    and its affiliates, except as required to comply with Section 4(c) of
12 //    the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 //     http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_MALLOC_TAG_H
25 #define PXR_BASE_TF_MALLOC_TAG_H
26 
27 #include "pxr/pxr.h"
28 #include "pxr/base/tf/api.h"
29 
30 #include <cstdlib>
31 #include <iosfwd>
32 #include <stdint.h>
33 #include <string>
34 #include <vector>
35 
36 PXR_NAMESPACE_OPEN_SCOPE
37 
38 /// \file tf/mallocTag.h
39 /// \ingroup group_tf_MallocTag
40 
41 struct Tf_MallocPathNode;
42 
43 /// \class TfMallocTag
44 /// \ingroup group_tf_MallocTag
45 ///
46 /// Top-down memory tagging system.
47 ///
48 /// See \ref page_tf_MallocTag for a detailed description.
49 class TfMallocTag {
50 public:
51     struct CallStackInfo;
52 
53     /// \struct CallTree
54     /// Summary data structure for \c malloc statistics.
55     ///
56     /// The \c CallTree structure is used to deliver a snapshot of the current
57     /// malloc usage.  It is accessible as publicly modifiable data because it
58     /// is simply a returned snapshot of the current memory state.
59     struct CallTree {
60         /// \struct PathNode
61         /// Node in the call tree structure.
62         ///
63         /// A \c PathNode captures the hierarchy of active \c TfAutoMallocTag
64         /// objects that are pushed and popped during program execution.  Each
65         /// \c PathNode thus describes a sequence of call-sites (i.e. a path
66         /// down the call tree).  Repeated call sites (in the case of
67         /// co-recursive function calls) can be skipped, e.g. pushing tags
68         /// "A", "B", "C", "B", "C" leads to only three path-nodes,
69         /// representing the paths "A", "AB", and "ABC".  Allocations done at
70         /// the bottom (i.e. when tags "A", "B", "C", "B", "C" are all active)
71         /// are billed to the longest path node in the sequence, which
72         /// corresponds to the path "ABC").
73         ///
74         /// Path nodes track both the memory they incur directly (\c
75         /// nBytesDirect) but more importantly, the total memory allocated by
76         /// themselves and any of their children (\c nBytes).  The name of a
77         /// node (\c siteName) corresponds to the tag name of the final tag in
78         /// the path.
79         struct PathNode {
80             size_t nBytes,          ///< Allocated bytes by this or descendant nodes.
81                    nBytesDirect;    ///< Allocated bytes (only for this node).
82             size_t nAllocations;    ///< The number of allocations for this node.
83             std::string siteName;   ///< Tag name.
84             std::vector<PathNode>
85                         children;   ///< Children nodes.
86         };
87 
88         /// \struct CallSite
89         /// Record of the bytes allocated under each different tag.
90         ///
91         /// Each construction of a \c TfAutoMallocTag object with a different
92         /// argument produces a distinct \c CallSite record.  The total bytes
93         /// outstanding for all memory allocations made under a given
94         /// call-site are recorded in \c nBytes, while the name of the call
95         /// site is available as \c name.
96         struct CallSite {
97             std::string name;       ///< Tag name.
98             size_t nBytes;          ///< Allocated bytes.
99         };
100 
101         // Note: enum below must be kept in sync with tfmodule/mallocCallTree.h
102 
103         /// Specify which parts of the report to print.
104         enum PrintSetting {
105             TREE = 0,                   ///< Print the full call tree
106             CALLSITES,                  ///< Print just the call sites > 0.1%
107             BOTH                        ///< Print both tree and call sites
108         };
109 
110         /// Return the malloc report string.
111         ///
112         /// Get a malloc report of the tree and/or callsites.
113         ///
114         /// The columns in the report are abbreviated. Here are the definitions.
115         ///
116         /// \b TAGNAME : The name of the tag being tracked. This matches the
117         /// string argument to TfAutoMallocTag constructor.
118         ///
119         /// \b BytesIncl : Bytes Inclusive. This includes all bytes allocated by
120         /// this tag and any bytes of its children.
121         ///
122         /// \b BytesExcl : Bytes Exclusive. Only bytes allocated exclusively by
123         /// this tag, not including any bytes of its children.
124         ///
125         /// \b %%Prnt : (%% Parent).  me.BytesIncl / parent.BytesIncl * 100
126         ///
127         /// \b %%Exc : BytesExcl / BytesIncl * 100
128         ///
129         /// \b %%Totl : (%% Total). BytesExcl / TotalBytes * 100
130         TF_API
131         std::string GetPrettyPrintString(PrintSetting setting = BOTH,
132                                          size_t maxPrintedNodes = 100000) const;
133 
134         /// Generates a report to the ostream \p out.
135         ///
136         /// This report is printed in a way that is intended to be used by
137         /// xxtracediff.  If \p rootName is non-empty it will replace the name
138         /// of the tree root in the report.
139         TF_API
140         void Report(
141             std::ostream &out,
142             const std::string &rootName) const;
143 
144         /// \overload
145         TF_API
146         void Report(
147             std::ostream &out) const;
148 
149         /// All call sites.
150         std::vector<CallSite> callSites;
151 
152         /// Root node of the call-site hierarchy.
153         PathNode root;
154 
155         /// The captured malloc stacks.
156         std::vector<CallStackInfo> capturedCallStacks;
157     };
158 
159     /// \struct CallStackInfo
160     /// This struct is used to represent a call stack taken for an allocation
161     /// that was  billed under a specific malloc tag.
162     struct CallStackInfo
163     {
164         /// The stack frame pointers.
165         std::vector<uintptr_t> stack;
166 
167         /// The amount of allocated memory (accumulated over all allocations
168         /// sharing this call stack).
169         size_t size;
170 
171         /// The number of allocations (always one unless stack frames have
172         /// been combined to create unique stacks).
173         size_t numAllocations;
174     };
175 
176     /// Initialize the memory tagging system.
177     ///
178     /// This function returns \c true if the memory tagging system can be
179     /// successfully initialized or it has already been initialized. Otherwise,
180     /// \p *errMsg is set with an explanation for the failure.
181     ///
182     /// Until the system is initialized, the various memory reporting calls
183     /// will indicate that no memory has been allocated.  Note also that
184     /// memory allocated prior to calling \c Initialize() is not tracked i.e.
185     /// all data refers to allocations that happen subsequent to calling \c
186     /// Initialize().
187     TF_API static bool Initialize(std::string* errMsg);
188 
189     /// Return true if the tagging system is active.
190     ///
191     /// If \c Initialize() has been successfully called, this function returns
192     /// \c true.
IsInitialized()193     static bool IsInitialized() {
194         return TfMallocTag::_doTagging;
195     }
196 
197     /// Return total number of allocated bytes.
198     ///
199     /// The current total memory that has been allocated and not freed is
200     /// returned. Memory allocated before calling \c Initialize() is not
201     /// accounted for.
202     TF_API static size_t GetTotalBytes();
203 
204     /// Return the maximum total number of bytes that have ever been allocated
205     /// at one time.
206     ///
207     /// This is simply the maximum value of GetTotalBytes() since Initialize()
208     /// was called.
209     TF_API static size_t GetMaxTotalBytes();
210 
211     /// Return a snapshot of memory usage.
212     ///
213     /// Returns a snapshot by writing into \c *tree.  See the \c C *tree
214     /// structure for documentation.  If \c Initialize() has not been called,
215     /// \ *tree is set to a rather blank structure (empty vectors, empty
216     /// strings, zero in all integral fields) and \c false is returned;
217     /// otherwise, \p *tree is set with the contents of the current memory
218     /// snapshot and \c true is returned. It is fine to call this function on
219     /// the same \p *tree instance; each call simply overwrites the data from
220     /// the last call. If /p skipRepeated is \c true, then any repeated
221     /// callsite is skipped. See the \c CallTree documentation for more
222     /// details.
223     TF_API static bool GetCallTree(CallTree* tree, bool skipRepeated = true);
224 
225 private:
226     // Enum describing whether allocations are being tagged in an associated
227     // thread.
228     enum _Tagging {
229         _TaggingEnabled,   // Allocations are being tagged
230         _TaggingDisabled,  // Allocations are not being tagged
231 
232         _TaggingDormant    // Tagging has not been initialized in this
233                            // thread as no malloc tags have been pushed onto
234                            // the stack.
235     };
236 
237     struct _ThreadData;
238 
239 public:
240 
241     /// \class Auto
242     /// \ingroup group_tf_MallocTag
243     ///
244     /// Scoped (i.e. local) object for creating/destroying memory tags.
245     ///
246     /// Note: \c TfAutoMallocTag is a typedef to \c TfMallocTag::Auto; the
247     /// convention is to use \c TfAutoMallocTag to make it clear that the
248     /// local object exists only because its constructor and destructor modify
249     /// program state.
250     ///
251     /// A \c TfAutoMallocTag object is used to push a memory tag onto the
252     /// current call stack; destruction of the object pops the call stack.
253     /// Note that each thread has its own call-stack.
254     ///
255     /// There is no (measurable) cost to creating or destroying memory tags if
256     /// \c TfMallocTag::Initialize() has not been called; if it has, then
257     /// there is a small (but measurable) cost associated with pushing and
258     /// popping memory tags on the local call stack.  Most of the cost is
259     /// simply locking a mutex; typically, pushing or popping the call stack
260     /// does not actually cause any memory allocation unless this is the first
261     /// time that the given named tag has been encountered.
262     class Auto {
263     public:
264         Auto(const Auto &) = delete;
265         Auto& operator=(const Auto &) = delete;
266 
267         Auto(Auto &&) = delete;
268         Auto& operator=(Auto &&) = delete;
269 
270         /// Push a memory tag onto the local-call stack with name \p name.
271         ///
272         /// If \c TfMallocTag::Initialize() has not been called, this
273         /// constructor does essentially no (measurable) work, assuming \p
274         /// name is a string literal or just a pointer to an existing string.
275         ///
276         /// Objects of this class should only be created as local variables;
277         /// never as member variables, global variables, or via \c new.  If
278         /// you can't create your object as a local variable, you can make
279         /// manual calls to \c TfMallocTag::Push() and \c TfMallocTag::Pop(),
280         /// though you should do this only as a last resort.
Auto(const char * name)281         Auto(const char* name) : _threadData(0) {
282             if (TfMallocTag::_doTagging)
283                 _Begin(name);
284         }
285 
286         /// Push a memory tag onto the local-call stack with name \p name.
287         ///
288         /// If \c TfMallocTag::Initialize() has not been called, this
289         /// constructor does essentially no (measurable) work.  However, any
290         /// work done in constructing the \c std::string object \p name will
291         /// be incurred even if tagging is not active.  If this is an issue,
292         /// you can query \c TfMallocTag::IsInitialized() to avoid unneeded
293         /// work when tagging is inactive.  Note that the case when \p name is
294         /// a string literal does not apply here: instead, the constructor that
295         /// takes a \c const \c char* (above) will be called.
296         ///
297         /// Objects of this class should only be created as local variables;
298         /// never as member variables, global variables, or via \c new.  If
299         /// you can't create your object as a local variable, you can make
300         /// manual calls to \c TfMallocTag::Push() and \c TfMallocTag::Pop(),
301         /// though you should do this only as a last resort.
Auto(const std::string & name)302         Auto(const std::string& name) : _threadData(0) {
303             if (TfMallocTag::_doTagging)
304                 _Begin(name);
305         }
306 
307         /// Pop the tag from the stack before it is destructed.
308         ///
309         /// Normally you should not use this.  The normal destructor is
310         /// preferable because it insures proper release order.  If you call
311         /// \c Release(), make sure all tags are released in the opposite
312         /// order they were declared in.  It is better to use sub-scopes to
313         /// control the life span of tags, but if that won't work, \c
314         /// Release() is still preferable to \c TfMallocTag::Push() and \c
315         /// TfMallocTag::Pop() because it isn't vulnerable to early returns or
316         /// exceptions.
Release()317         inline void Release() {
318             if (_threadData) {
319                 _End();
320                 _threadData = NULL;
321             }
322         }
323 
324         /// Pop a memory tag from the local-call stack.
325         ///
326         /// If \c TfMallocTag::Initialize() was not called when this tag was
327         /// pushed onto the stack, popping the tag from the stack does
328         /// essentially no (measurable) work.
~Auto()329         inline ~Auto() {
330             Release();
331         }
332 
333     private:
334         TF_API void _Begin(const char* name);
335         TF_API void _Begin(const std::string& name);
336         TF_API void _End();
337 
338         _ThreadData* _threadData;
339 
340         friend class TfMallocTag;
341     };
342 
343     /// \class Auto2
344     /// \ingroup group_tf_MallocTag
345     ///
346     /// Scoped (i.e. local) object for creating/destroying memory tags.
347     ///
348     /// Auto2 is just like Auto, except it pushes two tags onto the stack.
349     class Auto2 {
350     public:
351         /// Push two memory tags onto the local-call stack.
352         ///
353         /// \see TfMallocTag::Auto(const char* name).
Auto2(const char * name1,const char * name2)354         Auto2(const char* name1, const char* name2) :
355             _tag1(name1),
356             _tag2(name2)
357         {
358         }
359 
360         /// Push two memory tags onto the local-call stack.
361         ///
362         /// \see TfMallocTag::Auto(const std::string& name).
Auto2(const std::string & name1,const std::string & name2)363         Auto2(const std::string& name1, const std::string& name2) :
364             _tag1(name1),
365             _tag2(name2)
366         {
367         }
368 
369         /// Pop two memory tags from the local-call stack.
370         ///
371         /// \see TfMallocTag::Auto(const char* name).
Release()372         void Release() {
373             _tag2.Release();
374             _tag1.Release();
375         }
376 
377     private:
378         Auto _tag1;
379         Auto _tag2;
380     };
381 
382     /// Manually push a tag onto the stack.
383     ///
384     /// This call has the same effect as the constructor for \c
385     /// TfMallocTag::Auto (aka \c TfAutoMallocTag), however a matching call to
386     /// \c Pop() is required.
387     ///
388     /// Note that initializing the tagging system between matching calls to \c
389     /// Push() and \c Pop() is ill-advised, which is yet another reason to
390     /// prefer using \c TfAutoMallocTag whenever possible.
Push(const std::string & name)391     static void Push(const std::string& name) {
392         TfMallocTag::Auto noname(name);
393         noname._threadData = NULL;  // disable destructor
394     }
395 
396     /// \overload
Push(const char * name)397     static void Push(const char* name) {
398         TfMallocTag::Auto noname(name);
399         noname._threadData = NULL;  // disable destructor
400     }
401 
402     /// Manually pop a tag from the stack.
403     ///
404     /// This call has the same effect as the destructor for \c
405     /// TfMallocTag::Auto; it must properly nest with a matching call to \c
406     /// Push(), of course.
407     ///
408     /// If \c name is supplied and does not match the tag at the top of the
409     /// stack, a warning message is issued.
410     TF_API static void Pop(const char* name = NULL);
411 
412     /// \overload
Pop(const std::string & name)413     static void Pop(const std::string& name) {
414         Pop(name.c_str());
415     }
416 
417     /// Sets the tags to trap in the debugger.
418     ///
419     /// When memory is allocated or freed for any tag that matches \p
420     /// matchList the debugger trap is invoked. If a debugger is attached the
421     /// program will stop in the debugger, otherwise the program will continue
422     /// to run. See \c ArchDebuggerTrap() and \c ArchDebuggerWait().
423     ///
424     /// \p matchList is a comma, tab or newline separated list of malloc tag
425     /// names. The names can have internal spaces but leading and trailing
426     /// spaces are stripped. If a name ends in '*' then the suffix is
427     /// wildcarded. A name can have a leading '-' or '+' to prevent or allow a
428     /// match. Each name is considered in order and later matches override
429     /// earlier matches. For example, 'Csd*, -CsdScene::_Populate*,
430     /// +CsdScene::_PopulatePrimCacheLocal' matches any malloc tag starting
431     /// with 'Csd' but nothing starting with 'CsdScene::_Populate' except
432     /// 'CsdScene::_PopulatePrimCacheLocal'. Use the empty string to disable
433     /// debugging traps.
434     TF_API static void SetDebugMatchList(const std::string& matchList);
435 
436     /// Sets the tags to trace.
437     ///
438     /// When memory is allocated for any tag that matches \p matchList a stack
439     /// trace is recorded.  When that memory is released the stack trace is
440     /// discarded.  Clients can call \c GetCapturedMallocStacks() to get a
441     /// list of all recorded stack traces.  This is useful for finding leaks.
442     ///
443     /// Traces recorded for any tag that will no longer be matched are
444     /// discarded by this call.  Traces recorded for tags that continue to be
445     /// matched are retained.
446     ///
447     /// \p matchList is a comma, tab or newline separated list of malloc tag
448     /// names.  The names can have internal spaces but leading and trailing
449     /// spaces are stripped.  If a name ends in '*' then the suffix is
450     /// wildcarded.  A name can have a leading '-' or '+' to prevent or allow
451     /// a match.  Each name is considered in order and later matches override
452     /// earlier matches.  For example, 'Csd*, -CsdScene::_Populate*,
453     /// +CsdScene::_PopulatePrimCacheLocal' matches any malloc tag starting
454     /// with 'Csd' but nothing starting with 'CsdScene::_Populate' except
455     /// 'CsdScene::_PopulatePrimCacheLocal'.  Use the empty string to disable
456     /// stack capturing.
457     TF_API static void SetCapturedMallocStacksMatchList(const std::string& matchList);
458 
459     /// Returns the captured malloc stack traces for allocations billed to the
460     /// malloc tags passed to SetCapturedMallocStacksMatchList().
461     ///
462     /// \note This method also clears the internally held set of captured
463     /// stacks.
464     TF_API static std::vector<std::vector<uintptr_t> > GetCapturedMallocStacks();
465 
466 private:
467     friend struct Tf_MallocGlobalData;
468 
469     class _TemporaryTaggingState {
470     public:
471         explicit _TemporaryTaggingState(_Tagging state);
472         ~_TemporaryTaggingState();
473 
474         _TemporaryTaggingState(const _TemporaryTaggingState &);
475         _TemporaryTaggingState& operator=(const _TemporaryTaggingState &);
476 
477         _TemporaryTaggingState(_TemporaryTaggingState &&);
478         _TemporaryTaggingState& operator=(_TemporaryTaggingState &&);
479 
480     private:
481         _Tagging _oldState;
482     };
483 
484     static void _SetTagging(_Tagging state);
485     static _Tagging _GetTagging();
486 
487     static bool _Initialize(std::string* errMsg);
488 
489     static inline bool _ShouldNotTag(_ThreadData**, _Tagging* t = NULL);
490     static inline Tf_MallocPathNode* _GetCurrentPathNodeNoLock(
491         const _ThreadData* threadData);
492 
493     static void* _MallocWrapper_ptmalloc(size_t, const void*);
494     static void* _ReallocWrapper_ptmalloc(void*, size_t, const void*);
495     static void* _MemalignWrapper_ptmalloc(size_t, size_t, const void*);
496     static void  _FreeWrapper_ptmalloc(void*, const void*);
497 
498     static void* _MallocWrapper(size_t, const void*);
499     static void* _ReallocWrapper(void*, size_t, const void*);
500     static void* _MemalignWrapper(size_t, size_t, const void*);
501     static void  _FreeWrapper(void*, const void*);
502 
503     friend class TfMallocTag::Auto;
504     class Tls;
505     friend class TfMallocTag::Tls;
506     TF_API static bool _doTagging;
507 };
508 
509 /// Top-down memory tagging system.
510 typedef TfMallocTag::Auto TfAutoMallocTag;
511 
512 /// Top-down memory tagging system.
513 typedef TfMallocTag::Auto2 TfAutoMallocTag2;
514 
515 /// Enable lib/tf memory management.
516 ///
517 /// Invoking this macro inside a class body causes the class operator \c new to push
518 /// two \c TfAutoMallocTag objects onto the stack before actually allocating memory for the
519 /// class.  The names passed into the tag are used for the two tags; pass NULL if you
520 /// don't need the second tag.  For example,
521 /// \code
522 /// class MyBigMeshVertex {
523 /// public:
524 ///     TF_MALLOC_TAG_NEW("MyBigMesh", "Vertex");
525 ///     ...
526 /// }
527 /// \endcode
528 /// will cause dynamic allocations of \c MyBigMeshVertex to be grouped under
529 /// the tag \c Vertex which is in turn grouped under \c MyBigMesh.  However,
530 /// \code
531 /// class MyBigMesh {
532 /// public:
533 ///     TF_MALLOC_TAG_NEW("MyBigMesh", NULL);
534 ///     ...
535 /// }
536 /// \endcode
537 /// specifies \c NULL for the second tag because the first tag is sufficient.
538 ///
539 /// Normally, this macro should be placed in the public section of a class.
540 /// Note that you cannot specify both this and \c TF_FIXED_SIZE_ALLOCATOR()
541 /// for the same class.
542 ///
543 /// Also, note that allocations of a class inside an STL datastructure will
544 /// not be grouped under the indicated tags.
545 /// \remark Placed in .h files.
546 ///
547 /// \hideinitializer
548 //
549 PXR_NAMESPACE_CLOSE_SCOPE
550 
551 #define TF_MALLOC_TAG_NEW(name1, name2)                                       \
552     /* this is for STL purposes */                                            \
553     inline void* operator new(::std::size_t, void* ptr) {                     \
554         return ptr;                                                           \
555     }                                                                         \
556                                                                               \
557     inline void* operator new(::std::size_t s) {                              \
558         PXR_NS::TfAutoMallocTag tag1(name1);                                  \
559         PXR_NS::TfAutoMallocTag tag2(name2);                                  \
560         return malloc(s);                                                     \
561     }                                                                         \
562                                                                               \
563     inline void* operator new[](::std::size_t s) {                            \
564         PXR_NS::TfAutoMallocTag tag1(name1);                                  \
565         PXR_NS::TfAutoMallocTag tag2(name2);                                  \
566         return malloc(s);                                                     \
567     }                                                                         \
568                                                                               \
569     /* Required due to the placement-new override above. */                   \
570     inline void operator delete(void* ptr, void* place) {}                    \
571                                                                               \
572     inline void operator delete(void* ptr, size_t) {                          \
573         free(ptr);                                                            \
574     }                                                                         \
575                                                                               \
576     inline void operator delete[] (void* ptr, size_t) {                       \
577         free(ptr);                                                            \
578     }                                                                         \
579 
580 #endif
581