1 //
2 // Copyright 2016 Pixar
3 //
4 // Licensed under the Apache License, Version 2.0 (the "Apache License")
5 // with the following modification; you may not use this file except in
6 // compliance with the Apache License and the following modification to it:
7 // Section 6. Trademarks. is deleted and replaced with:
8 //
9 // 6. Trademarks. This License does not grant permission to use the trade
10 // names, trademarks, service marks, or product names of the Licensor
11 // and its affiliates, except as required to comply with Section 4(c) of
12 // the License and to reproduce the content of the NOTICE file.
13 //
14 // You may obtain a copy of the Apache License at
15 //
16 // http://www.apache.org/licenses/LICENSE-2.0
17 //
18 // Unless required by applicable law or agreed to in writing, software
19 // distributed under the Apache License with the above modification is
20 // distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 // KIND, either express or implied. See the Apache License for the specific
22 // language governing permissions and limitations under the Apache License.
23 //
24 #ifndef PXR_BASE_TF_TOKEN_H
25 #define PXR_BASE_TF_TOKEN_H
26
27 /// \file tf/token.h
28 ///
29 /// \c TfToken class for efficient string referencing and hashing, plus
30 /// conversions to and from stl string containers.
31
32 #include "pxr/pxr.h"
33
34 #include "pxr/base/tf/api.h"
35 #include "pxr/base/tf/diagnosticLite.h"
36 #include "pxr/base/tf/hash.h"
37 #include "pxr/base/tf/hashset.h"
38 #include "pxr/base/tf/pointerAndBits.h"
39
40 #include <atomic>
41 #include <iosfwd>
42 #include <string>
43 #include <vector>
44 #include <set>
45
46 PXR_NAMESPACE_OPEN_SCOPE
47
48 struct TfTokenFastArbitraryLessThan;
49
50 /// \class TfToken
51 /// \ingroup group_tf_String
52 ///
53 /// Token for efficient comparison, assignment, and hashing of known strings.
54 ///
55 /// A TfToken is a handle for a registered string, and can be compared,
56 /// assigned, and hashed in constant time. It is useful when a bounded number
57 /// of strings are used as fixed symbols (but never modified).
58 ///
59 /// For example, the set of avar names in a shot is large but bounded, and
60 /// once an avar name is discovered, it is never manipulated. If these names
61 /// were passed around as strings, every comparison and hash would be linear
62 /// in the number of characters. (String assignment itself is sometimes a
63 /// constant time operation, but it is sometimes linear in the length of the
64 /// string as well as requiring a memory allocation.)
65 ///
66 /// To use TfToken, simply create an instance from a string or const char*.
67 /// If the string hasn't been seen before, a copy of it is added to a global
68 /// table. The resulting TfToken is simply a wrapper around an string*,
69 /// pointing that canonical copy of the string. Thus, operations on the token
70 /// are very fast. (The string's hash is simply the address of the canonical
71 /// copy, so hashing the string is constant time.)
72 ///
73 /// The free functions \c TfToTokenVector() and \c TfToStringVector() provide
74 /// conversions to and from vectors of \c string.
75 ///
76 /// Note: Access to the global table is protected by a mutex. This is a good
77 /// idea as long as clients do not construct tokens from strings too
78 /// frequently. Construct tokens only as often as you must (for example, as
79 /// you read data files), and <i>never</i> in inner loops. Of course, once
80 /// you have a token, feel free to compare, assign, and hash it as often as
81 /// you like. (That's what it's for.) In order to help prevent tokens from
82 /// being re-created over and over, auto type conversion from \c string and \c
83 /// char* to \c TfToken is disabled (you must use the explicit \c TfToken
84 /// constructors). However, auto conversion from \c TfToken to \c string and
85 /// \c char* is provided.
86 ///
87 class TfToken
88 {
89 public:
90 enum _ImmortalTag { Immortal };
91
92 /// Create the empty token, containing the empty string.
TfToken()93 constexpr TfToken() noexcept {}
94
95 /// Copy constructor.
TfToken(TfToken const & rhs)96 TfToken(TfToken const& rhs) noexcept : _rep(rhs._rep) { _AddRef(); }
97
98 /// Move constructor.
TfToken(TfToken && rhs)99 TfToken(TfToken && rhs) noexcept : _rep(rhs._rep) {
100 rhs._rep = TfPointerAndBits<const _Rep>();
101 }
102
103 /// Copy assignment.
104 TfToken& operator= (TfToken const& rhs) noexcept {
105 if (&rhs != this) {
106 rhs._AddRef();
107 _RemoveRef();
108 _rep = rhs._rep;
109 }
110 return *this;
111 }
112
113 /// Move assignment.
114 TfToken& operator= (TfToken && rhs) noexcept {
115 if (&rhs != this) {
116 _RemoveRef();
117 _rep = rhs._rep;
118 rhs._rep = TfPointerAndBits<const _Rep>();
119 }
120 return *this;
121 }
122
123 /// Destructor.
~TfToken()124 ~TfToken() { _RemoveRef(); }
125
126 /// Acquire a token for the given string.
127 //
128 // This constructor involves a string hash and a lookup in the global
129 // table, and so should not be done more often than necessary. When
130 // possible, create a token once and reuse it many times.
131 TF_API explicit TfToken(std::string const& s);
132 /// \overload
133 // Create a token for \p s, and make it immortal. If \p s exists in the
134 // token table already and is not immortal, make it immortal. Immortal
135 // tokens are faster to copy than mortal tokens, but they will never expire
136 // and release their memory.
137 TF_API TfToken(std::string const& s, _ImmortalTag);
138
139 /// Acquire a token for the given string.
140 //
141 // This constructor involves a string hash and a lookup in the global
142 // table, and so should not be done more often than necessary. When
143 // possible, create a token once and reuse it many times.
144 TF_API explicit TfToken(char const* s);
145 /// \overload
146 // Create a token for \p s, and make it immortal. If \p s exists in the
147 // token table already and is not immortal, make it immortal. Immortal
148 // tokens are faster to copy than mortal tokens, but they will never expire
149 // and release their memory.
150 TF_API TfToken(char const* s, _ImmortalTag);
151
152 /// Find the token for the given string, if one exists.
153 //
154 // If a token has previous been created for the given string, this
155 // will return it. Otherwise, the empty token will be returned.
156 TF_API static TfToken Find(std::string const& s);
157
158 /// Return a size_t hash for this token.
159 //
160 // The hash is based on the token's storage identity; this is immutable
161 // as long as the token is in use anywhere in the process.
162 //
163 inline size_t Hash() const;
164
165 /// Functor to use for hash maps from tokens to other things.
166 struct HashFunctor {
operatorHashFunctor167 size_t operator()(TfToken const& token) const { return token.Hash(); }
168 };
169
170 /// \typedef TfHashSet<TfToken, TfToken::HashFunctor> HashSet;
171 ///
172 /// Predefined type for TfHashSet of tokens, since it's so awkward to
173 /// manually specify.
174 ///
175 typedef TfHashSet<TfToken, TfToken::HashFunctor> HashSet;
176
177 /// \typedef std::set<TfToken, TfTokenFastArbitraryLessThan> Set;
178 ///
179 /// Predefined type for set of tokens, for when faster lookup is
180 /// desired, without paying the memory or initialization cost of a
181 /// TfHashSet.
182 ///
183 typedef std::set<TfToken, TfTokenFastArbitraryLessThan> Set;
184
185 /// Return the size of the string that this token represents.
size()186 size_t size() const {
187 _Rep const *rep = _rep.Get();
188 return rep ? rep->_str.size() : 0;
189 }
190
191 /// Return the text that this token represents.
192 ///
193 /// \note The returned pointer value is not valid after this TfToken
194 /// object has been destroyed.
195 ///
GetText()196 char const* GetText() const {
197 _Rep const *rep = _rep.Get();
198 return rep ? rep->_str.c_str() : "";
199 }
200
201 /// Synonym for GetText().
data()202 char const *data() const {
203 return GetText();
204 }
205
206 /// Return the string that this token represents.
GetString()207 std::string const& GetString() const {
208 _Rep const *rep = _rep.Get();
209 return rep ? rep->_str : _GetEmptyString();
210 }
211
212 /// Swap this token with another.
Swap(TfToken & other)213 inline void Swap(TfToken &other) {
214 std::swap(_rep, other._rep);
215 }
216
217 /// Equality operator
218 bool operator==(TfToken const& o) const {
219 // Equal if pointers & bits are equal, or if just pointers are.
220 return _rep.Get() == o._rep.Get();
221 }
222
223 /// Equality operator
224 bool operator!=(TfToken const& o) const {
225 return !(*this == o);
226 }
227
228 /// Equality operator for \c char strings. Not as fast as direct
229 /// token to token equality testing
230 TF_API bool operator==(std::string const& o) const;
231
232 /// Equality operator for \c char strings. Not as fast as direct
233 /// token to token equality testing
234 TF_API bool operator==(const char *) const;
235
236 /// \overload
237 friend bool operator==(std::string const& o, TfToken const& t) {
238 return t == o;
239 }
240
241 /// \overload
242 friend bool operator==(const char *o, TfToken const& t) {
243 return t == o;
244 }
245
246 /// Inequality operator for \c string's. Not as fast as direct
247 /// token to token equality testing
248 bool operator!=(std::string const& o) const {
249 return !(*this == o);
250 }
251
252 /// \overload
253 friend bool operator!=(std::string const& o, TfToken const& t) {
254 return !(t == o);
255 }
256
257 /// Inequality operator for \c char strings. Not as fast as direct
258 /// token to token equality testing
259 bool operator!=(char const* o) const {
260 return !(*this == o);
261 }
262
263 /// \overload
264 friend bool operator!=(char const* o, TfToken const& t) {
265 return !(t == o);
266 }
267
268 /// Less-than operator that compares tokenized strings lexicographically.
269 /// Allows \c TfToken to be used in \c std::set
270 inline bool operator<(TfToken const& r) const {
271 auto ll = _rep.GetLiteral(), rl = r._rep.GetLiteral();
272 if (ll && rl) {
273 auto lrep = _rep.Get(), rrep = r._rep.Get();
274 uint64_t lcc = lrep->_compareCode, rcc = rrep->_compareCode;
275 return lcc < rcc || (lcc == rcc && lrep->_str < rrep->_str);
276 }
277 // One or both are zero -- return true if ll is zero and rl is not.
278 return !ll && rl;
279 }
280
281 /// Greater-than operator that compares tokenized strings lexicographically.
282 inline bool operator>(TfToken const& o) const {
283 return o < *this;
284 }
285
286 /// Greater-than-or-equal operator that compares tokenized strings
287 /// lexicographically.
288 inline bool operator>=(TfToken const& o) const {
289 return !(*this < o);
290 }
291
292 /// Less-than-or-equal operator that compares tokenized strings
293 /// lexicographically.
294 inline bool operator<=(TfToken const& o) const {
295 return !(*this > o);
296 }
297
298 /// Allow \c TfToken to be auto-converted to \c string
299 operator std::string const& () const { return GetString(); }
300
301 /// Returns \c true iff this token contains the empty string \c ""
IsEmpty()302 bool IsEmpty() const { return _rep.GetLiteral() == 0; }
303
304 /// Returns \c true iff this is an immortal token.
IsImmortal()305 bool IsImmortal() const { return !_rep->_isCounted; }
306
307 /// Stream insertion.
308 friend TF_API std::ostream &operator <<(std::ostream &stream, TfToken const&);
309
310 /// TfHash support.
311 template <class HashState>
312 friend void
TfHashAppend(HashState & h,TfToken const & token)313 TfHashAppend(HashState &h, TfToken const &token) {
314 h.Append(token._rep.Get());
315 }
316
317 private:
318 // Add global swap overload.
swap(TfToken & lhs,TfToken & rhs)319 friend void swap(TfToken &lhs, TfToken &rhs) {
320 lhs.Swap(rhs);
321 }
322
_AddRef()323 void _AddRef() const {
324 if (_rep.BitsAs<bool>()) {
325 // We believe this rep is refCounted.
326 if (!_rep->IncrementIfCounted()) {
327 // Our belief is wrong, update our cache of countedness.
328 _rep.SetBits(false);
329 }
330 }
331 }
332
_RemoveRef()333 void _RemoveRef() const {
334 if (_rep.BitsAs<bool>()) {
335 // We believe this rep is refCounted.
336 if (_rep->_isCounted) {
337 if (_rep->_refCount.load(std::memory_order_relaxed) == 1) {
338 _PossiblyDestroyRep();
339 }
340 else {
341 /*
342 * This is deliberately racy. It's possible the statement
343 * below drops our count to zero, and we leak the rep
344 * (i.e. we leave it in the table). That's a low
345 * probability event, in exchange for only grabbing the lock
346 * (in _PossiblyDestroyRep()) when the odds are we really do
347 * need to modify the table.
348 *
349 * Note that even if we leak the rep, if we look it up
350 * again, we'll simply repull it from the table and keep
351 * using it. So it's not even necessarily a true leak --
352 * it's just a potential leak.
353 */
354 _rep->_refCount.fetch_sub(1, std::memory_order_relaxed);
355 }
356 } else {
357 // Our belief is wrong, update our cache of countedness.
358 _rep.SetBits(false);
359 }
360 }
361 }
362
363 void TF_API _PossiblyDestroyRep() const;
364
365 struct _Rep {
_Rep_Rep366 _Rep() {}
_Rep_Rep367 explicit _Rep(char const *s) : _str(s), _cstr(_str.c_str()) {}
_Rep_Rep368 explicit _Rep(std::string const &s) : _str(s), _cstr(_str.c_str()) {}
369
370 // Make sure we reacquire _cstr from _str on copy and assignment
371 // to avoid holding on to a dangling pointer. However, if rhs'
372 // _cstr member doesn't come from its _str, just copy it directly
373 // over. This is to support lightweight _Rep objects used for
374 // internal lookups.
_Rep_Rep375 _Rep(_Rep const &rhs) : _str(rhs._str),
376 _cstr(rhs._str.c_str() != rhs._cstr ?
377 rhs._cstr : _str.c_str()),
378 _compareCode(rhs._compareCode),
379 _refCount(rhs._refCount.load()),
380 _isCounted(rhs._isCounted),
381 _setNum(rhs._setNum) {}
382 _Rep& operator=(_Rep const &rhs) {
383 _str = rhs._str;
384 _cstr = (rhs._str.c_str() != rhs._cstr ? rhs._cstr : _str.c_str());
385 _compareCode = rhs._compareCode;
386 _refCount = rhs._refCount.load();
387 _isCounted = rhs._isCounted;
388 _setNum = rhs._setNum;
389 return *this;
390 }
391
IncrementIfCounted_Rep392 inline bool IncrementIfCounted() const {
393 const bool isCounted = _isCounted;
394 if (isCounted) {
395 _refCount.fetch_add(1, std::memory_order_relaxed);
396 }
397 return isCounted;
398 }
399
400 std::string _str;
401 char const *_cstr;
402 mutable uint64_t _compareCode;
403 mutable std::atomic_int _refCount;
404 mutable bool _isCounted;
405 mutable unsigned char _setNum;
406 };
407
408 friend struct TfTokenFastArbitraryLessThan;
409 friend struct Tf_TokenRegistry;
410
411 TF_API static std::string const& _GetEmptyString();
412
413 mutable TfPointerAndBits<const _Rep> _rep;
414 };
415
416 inline size_t
Hash()417 TfToken::Hash() const
418 {
419 return TfHash()(*this);
420 }
421
422 /// Fast but non-lexicographical (in fact, arbitrary) less-than comparison for
423 /// TfTokens. Should only be used in performance-critical cases.
424 struct TfTokenFastArbitraryLessThan {
operatorTfTokenFastArbitraryLessThan425 inline bool operator()(TfToken const &lhs, TfToken const &rhs) const {
426 return lhs._rep.Get() < rhs._rep.Get();
427 }
428 };
429
430 /// Convert the vector of strings \p sv into a vector of \c TfToken
431 TF_API std::vector<TfToken>
432 TfToTokenVector(const std::vector<std::string> &sv);
433
434 /// Convert the vector of \c TfToken \p tv into a vector of strings
435 TF_API std::vector<std::string>
436 TfToStringVector(const std::vector<TfToken> &tv);
437
438 /// Overload hash_value for TfToken.
hash_value(const TfToken & x)439 inline size_t hash_value(const TfToken& x) { return x.Hash(); }
440
441 /// Convenience types.
442 typedef std::vector<TfToken> TfTokenVector;
443
444 PXR_NAMESPACE_CLOSE_SCOPE
445
446 #endif // PXR_BASE_TF_TOKEN_H
447