1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 //    names, trademarks, service marks, or product names of the Licensor
11 //    and its affiliates, except as required to comply with Section 4(c) of
12 //    the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 //     http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_TOKEN_H
25 #define PXR_BASE_TF_TOKEN_H
26 
27 /// \file tf/token.h
28 ///
29 /// \c TfToken class for efficient string referencing and hashing, plus
30 /// conversions to and from stl string containers.
31 
32 #include "pxr/pxr.h"
33 
34 #include "pxr/base/tf/api.h"
35 #include "pxr/base/tf/diagnosticLite.h"
36 #include "pxr/base/tf/hash.h"
37 #include "pxr/base/tf/hashset.h"
38 #include "pxr/base/tf/pointerAndBits.h"
39 
40 #include <atomic>
41 #include <iosfwd>
42 #include <string>
43 #include <vector>
44 #include <set>
45 
46 PXR_NAMESPACE_OPEN_SCOPE
47 
48 struct TfTokenFastArbitraryLessThan;
49 
50 /// \class TfToken
51 /// \ingroup group_tf_String
52 ///
53 /// Token for efficient comparison, assignment, and hashing of known strings.
54 ///
55 /// A TfToken is a handle for a registered string, and can be compared,
56 /// assigned, and hashed in constant time.  It is useful when a bounded number
57 /// of strings are used as fixed symbols (but never modified).
58 ///
59 /// For example, the set of avar names in a shot is large but bounded, and
60 /// once an avar name is discovered, it is never manipulated.  If these names
61 /// were passed around as strings, every comparison and hash would be linear
62 /// in the number of characters.  (String assignment itself is sometimes a
63 /// constant time operation, but it is sometimes linear in the length of the
64 /// string as well as requiring a memory allocation.)
65 ///
66 /// To use TfToken, simply create an instance from a string or const char*.
67 /// If the string hasn't been seen before, a copy of it is added to a global
68 /// table.  The resulting TfToken is simply a wrapper around an string*,
69 /// pointing that canonical copy of the string.  Thus, operations on the token
70 /// are very fast.  (The string's hash is simply the address of the canonical
71 /// copy, so hashing the string is constant time.)
72 ///
73 /// The free functions \c TfToTokenVector() and \c TfToStringVector() provide
74 /// conversions to and from vectors of \c string.
75 ///
76 /// Note: Access to the global table is protected by a mutex.  This is a good
77 /// idea as long as clients do not construct tokens from strings too
78 /// frequently.  Construct tokens only as often as you must (for example, as
79 /// you read data files), and <i>never</i> in inner loops.  Of course, once
80 /// you have a token, feel free to compare, assign, and hash it as often as
81 /// you like.  (That's what it's for.)  In order to help prevent tokens from
82 /// being re-created over and over, auto type conversion from \c string and \c
83 /// char* to \c TfToken is disabled (you must use the explicit \c TfToken
84 /// constructors).  However, auto conversion from \c TfToken to \c string and
85 /// \c char* is provided.
86 ///
87 class TfToken
88 {
89 public:
90     enum _ImmortalTag { Immortal };
91 
92     /// Create the empty token, containing the empty string.
TfToken()93     constexpr TfToken() noexcept {}
94 
95     /// Copy constructor.
TfToken(TfToken const & rhs)96     TfToken(TfToken const& rhs) noexcept : _rep(rhs._rep) { _AddRef(); }
97 
98     /// Move constructor.
TfToken(TfToken && rhs)99     TfToken(TfToken && rhs) noexcept : _rep(rhs._rep) {
100         rhs._rep = TfPointerAndBits<const _Rep>();
101     }
102 
103     /// Copy assignment.
104     TfToken& operator= (TfToken const& rhs) noexcept {
105         if (&rhs != this) {
106             rhs._AddRef();
107             _RemoveRef();
108             _rep = rhs._rep;
109         }
110         return *this;
111     }
112 
113     /// Move assignment.
114     TfToken& operator= (TfToken && rhs) noexcept {
115         if (&rhs != this) {
116             _RemoveRef();
117             _rep = rhs._rep;
118             rhs._rep = TfPointerAndBits<const _Rep>();
119         }
120         return *this;
121     }
122 
123     /// Destructor.
~TfToken()124     ~TfToken() { _RemoveRef(); }
125 
126     /// Acquire a token for the given string.
127     //
128     // This constructor involves a string hash and a lookup in the global
129     // table, and so should not be done more often than necessary.  When
130     // possible, create a token once and reuse it many times.
131     TF_API explicit TfToken(std::string const& s);
132     /// \overload
133     // Create a token for \p s, and make it immortal.  If \p s exists in the
134     // token table already and is not immortal, make it immortal.  Immortal
135     // tokens are faster to copy than mortal tokens, but they will never expire
136     // and release their memory.
137     TF_API TfToken(std::string const& s, _ImmortalTag);
138 
139     /// Acquire a token for the given string.
140     //
141     // This constructor involves a string hash and a lookup in the global
142     // table, and so should not be done more often than necessary.  When
143     // possible, create a token once and reuse it many times.
144     TF_API explicit TfToken(char const* s);
145     /// \overload
146     // Create a token for \p s, and make it immortal.  If \p s exists in the
147     // token table already and is not immortal, make it immortal.  Immortal
148     // tokens are faster to copy than mortal tokens, but they will never expire
149     // and release their memory.
150     TF_API TfToken(char const* s, _ImmortalTag);
151 
152     /// Find the token for the given string, if one exists.
153     //
154     // If a token has previous been created for the given string, this
155     // will return it.  Otherwise, the empty token will be returned.
156     TF_API static TfToken Find(std::string const& s);
157 
158     /// Return a size_t hash for this token.
159     //
160     // The hash is based on the token's storage identity; this is immutable
161     // as long as the token is in use anywhere in the process.
162     //
163     inline size_t Hash() const;
164 
165     /// Functor to use for hash maps from tokens to other things.
166     struct HashFunctor {
operatorHashFunctor167         size_t operator()(TfToken const& token) const { return token.Hash(); }
168     };
169 
170     /// \typedef TfHashSet<TfToken, TfToken::HashFunctor> HashSet;
171     ///
172     /// Predefined type for TfHashSet of tokens, since it's so awkward to
173     /// manually specify.
174     ///
175     typedef TfHashSet<TfToken, TfToken::HashFunctor> HashSet;
176 
177     /// \typedef std::set<TfToken, TfTokenFastArbitraryLessThan> Set;
178     ///
179     /// Predefined type for set of tokens, for when faster lookup is
180     /// desired, without paying the memory or initialization cost of a
181     /// TfHashSet.
182     ///
183     typedef std::set<TfToken, TfTokenFastArbitraryLessThan> Set;
184 
185     /// Return the size of the string that this token represents.
size()186     size_t size() const {
187         _Rep const *rep = _rep.Get();
188         return rep ? rep->_str.size() : 0;
189     }
190 
191     /// Return the text that this token represents.
192     ///
193     /// \note The returned pointer value is not valid after this TfToken
194     /// object has been destroyed.
195     ///
GetText()196     char const* GetText() const {
197         _Rep const *rep = _rep.Get();
198         return rep ? rep->_str.c_str() : "";
199     }
200 
201     /// Synonym for GetText().
data()202     char const *data() const {
203         return GetText();
204     }
205 
206     /// Return the string that this token represents.
GetString()207     std::string const& GetString() const {
208         _Rep const *rep = _rep.Get();
209         return rep ? rep->_str : _GetEmptyString();
210     }
211 
212     /// Swap this token with another.
Swap(TfToken & other)213     inline void Swap(TfToken &other) {
214         std::swap(_rep, other._rep);
215     }
216 
217     /// Equality operator
218     bool operator==(TfToken const& o) const {
219         // Equal if pointers & bits are equal, or if just pointers are.
220         return _rep.Get() == o._rep.Get();
221     }
222 
223     /// Equality operator
224     bool operator!=(TfToken const& o) const {
225         return !(*this == o);
226     }
227 
228     /// Equality operator for \c char strings.  Not as fast as direct
229     /// token to token equality testing
230     TF_API bool operator==(std::string const& o) const;
231 
232     /// Equality operator for \c char strings.  Not as fast as direct
233     /// token to token equality testing
234     TF_API bool operator==(const char *) const;
235 
236     /// \overload
237     friend bool operator==(std::string const& o, TfToken const& t) {
238         return t == o;
239     }
240 
241     /// \overload
242     friend bool operator==(const char *o, TfToken const& t) {
243         return t == o;
244     }
245 
246     /// Inequality operator for \c string's.  Not as fast as direct
247     /// token to token equality testing
248     bool operator!=(std::string const& o) const {
249         return !(*this == o);
250     }
251 
252     /// \overload
253     friend bool operator!=(std::string const& o, TfToken const& t)  {
254         return !(t == o);
255     }
256 
257     /// Inequality operator for \c char strings.  Not as fast as direct
258     /// token to token equality testing
259     bool operator!=(char const* o) const {
260         return !(*this == o);
261     }
262 
263     /// \overload
264     friend bool operator!=(char const* o, TfToken const& t) {
265         return !(t == o);
266     }
267 
268     /// Less-than operator that compares tokenized strings lexicographically.
269     /// Allows \c TfToken to be used in \c std::set
270     inline bool operator<(TfToken const& r) const {
271         auto ll = _rep.GetLiteral(), rl = r._rep.GetLiteral();
272         if (ll && rl) {
273             auto lrep = _rep.Get(), rrep = r._rep.Get();
274             uint64_t lcc = lrep->_compareCode, rcc = rrep->_compareCode;
275             return lcc < rcc || (lcc == rcc && lrep->_str < rrep->_str);
276         }
277         // One or both are zero -- return true if ll is zero and rl is not.
278         return !ll && rl;
279     }
280 
281     /// Greater-than operator that compares tokenized strings lexicographically.
282     inline bool operator>(TfToken const& o) const {
283         return o < *this;
284     }
285 
286     /// Greater-than-or-equal operator that compares tokenized strings
287     /// lexicographically.
288     inline bool operator>=(TfToken const& o) const {
289         return !(*this < o);
290     }
291 
292     /// Less-than-or-equal operator that compares tokenized strings
293     /// lexicographically.
294     inline bool operator<=(TfToken const& o) const {
295         return !(*this > o);
296     }
297 
298     /// Allow \c TfToken to be auto-converted to \c string
299     operator std::string const& () const { return GetString(); }
300 
301     /// Returns \c true iff this token contains the empty string \c ""
IsEmpty()302     bool IsEmpty() const { return _rep.GetLiteral() == 0; }
303 
304     /// Returns \c true iff this is an immortal token.
IsImmortal()305     bool IsImmortal() const { return !_rep->_isCounted; }
306 
307     /// Stream insertion.
308     friend TF_API std::ostream &operator <<(std::ostream &stream, TfToken const&);
309 
310     /// TfHash support.
311     template <class HashState>
312     friend void
TfHashAppend(HashState & h,TfToken const & token)313     TfHashAppend(HashState &h, TfToken const &token) {
314         h.Append(token._rep.Get());
315     }
316 
317 private:
318     // Add global swap overload.
swap(TfToken & lhs,TfToken & rhs)319     friend void swap(TfToken &lhs, TfToken &rhs) {
320         lhs.Swap(rhs);
321     }
322 
_AddRef()323     void _AddRef() const {
324         if (_rep.BitsAs<bool>()) {
325             // We believe this rep is refCounted.
326             if (!_rep->IncrementIfCounted()) {
327                 // Our belief is wrong, update our cache of countedness.
328                 _rep.SetBits(false);
329             }
330         }
331     }
332 
_RemoveRef()333     void _RemoveRef() const {
334         if (_rep.BitsAs<bool>()) {
335             // We believe this rep is refCounted.
336             if (_rep->_isCounted) {
337                 if (_rep->_refCount.load(std::memory_order_relaxed) == 1) {
338                     _PossiblyDestroyRep();
339                 }
340                 else {
341                     /*
342                      * This is deliberately racy.  It's possible the statement
343                      * below drops our count to zero, and we leak the rep
344                      * (i.e. we leave it in the table).  That's a low
345                      * probability event, in exchange for only grabbing the lock
346                      * (in _PossiblyDestroyRep()) when the odds are we really do
347                      * need to modify the table.
348                      *
349                      * Note that even if we leak the rep, if we look it up
350                      * again, we'll simply repull it from the table and keep
351                      * using it.  So it's not even necessarily a true leak --
352                      * it's just a potential leak.
353                      */
354                     _rep->_refCount.fetch_sub(1, std::memory_order_relaxed);
355                 }
356             } else {
357                 // Our belief is wrong, update our cache of countedness.
358                 _rep.SetBits(false);
359             }
360         }
361     }
362 
363     void TF_API _PossiblyDestroyRep() const;
364 
365     struct _Rep {
_Rep_Rep366         _Rep() {}
_Rep_Rep367         explicit _Rep(char const *s) : _str(s), _cstr(_str.c_str()) {}
_Rep_Rep368         explicit _Rep(std::string const &s) : _str(s), _cstr(_str.c_str()) {}
369 
370         // Make sure we reacquire _cstr from _str on copy and assignment
371         // to avoid holding on to a dangling pointer. However, if rhs'
372         // _cstr member doesn't come from its _str, just copy it directly
373         // over. This is to support lightweight _Rep objects used for
374         // internal lookups.
_Rep_Rep375         _Rep(_Rep const &rhs) : _str(rhs._str),
376                                 _cstr(rhs._str.c_str() != rhs._cstr ?
377                                           rhs._cstr : _str.c_str()),
378                                 _compareCode(rhs._compareCode),
379                                 _refCount(rhs._refCount.load()),
380                                 _isCounted(rhs._isCounted),
381                                 _setNum(rhs._setNum) {}
382         _Rep& operator=(_Rep const &rhs) {
383             _str = rhs._str;
384             _cstr = (rhs._str.c_str() != rhs._cstr ? rhs._cstr : _str.c_str());
385             _compareCode = rhs._compareCode;
386             _refCount = rhs._refCount.load();
387             _isCounted = rhs._isCounted;
388             _setNum = rhs._setNum;
389             return *this;
390         }
391 
IncrementIfCounted_Rep392         inline bool IncrementIfCounted() const {
393             const bool isCounted = _isCounted;
394             if (isCounted) {
395                 _refCount.fetch_add(1, std::memory_order_relaxed);
396             }
397             return isCounted;
398         }
399 
400         std::string _str;
401         char const *_cstr;
402         mutable uint64_t _compareCode;
403         mutable std::atomic_int _refCount;
404         mutable bool _isCounted;
405         mutable unsigned char _setNum;
406     };
407 
408     friend struct TfTokenFastArbitraryLessThan;
409     friend struct Tf_TokenRegistry;
410 
411     TF_API static std::string const& _GetEmptyString();
412 
413     mutable TfPointerAndBits<const _Rep> _rep;
414 };
415 
416 inline size_t
Hash()417 TfToken::Hash() const
418 {
419     return TfHash()(*this);
420 }
421 
422 /// Fast but non-lexicographical (in fact, arbitrary) less-than comparison for
423 /// TfTokens.  Should only be used in performance-critical cases.
424 struct TfTokenFastArbitraryLessThan {
operatorTfTokenFastArbitraryLessThan425     inline bool operator()(TfToken const &lhs, TfToken const &rhs) const {
426         return lhs._rep.Get() < rhs._rep.Get();
427     }
428 };
429 
430 /// Convert the vector of strings \p sv into a vector of \c TfToken
431 TF_API std::vector<TfToken>
432 TfToTokenVector(const std::vector<std::string> &sv);
433 
434 /// Convert the vector of \c TfToken \p tv into a vector of strings
435 TF_API std::vector<std::string>
436 TfToStringVector(const std::vector<TfToken> &tv);
437 
438 /// Overload hash_value for TfToken.
hash_value(const TfToken & x)439 inline size_t hash_value(const TfToken& x) { return x.Hash(); }
440 
441 /// Convenience types.
442 typedef std::vector<TfToken> TfTokenVector;
443 
444 PXR_NAMESPACE_CLOSE_SCOPE
445 
446 #endif // PXR_BASE_TF_TOKEN_H
447