1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <folly/lang/SafeAssert.h>
20 
21 #include <stdint.h>
22 
23 namespace folly {
24 
25 /**
26  * StampedPtr packs both a pointer to T and a uint16_t into a 64-bit value,
27  * exploiting the fact that current addresses are limited to 48 bits on
28  * all current x86-64 and ARM64 processors.
29  *
30  * For both x86-64 and ARM64, 64-bit pointers have a canonical
31  * form in which the upper 16 bits are equal to bit 47.  Intel has
32  * announced a 57-bit addressing mode (see https://software.intel.com/
33  * sites/default/files/managed/2b/80/5-level_paging_white_paper.pdf),
34  * but it is not yet available.  The first problematic platform will
35  * probably be ARMv8.2, which supports 52-bit virtual addresses.
36  *
37  * This code works on all of the platforms I have available for test,
38  * and probably on all currently-shipping platforms that have a hope of
39  * compiling folly.  Rather than enumerating the supported platforms via
40  * ifdef, this code dynamically validates its packing assumption in debug
41  * builds on each call to a mutating function.  Presumably by the time we
42  * are running this process in an operating system image that can address
43  * more than 256TB of memory, RAM cost and the latency of 128-bit CAS
44  * will have improved enough that this optimization is no longer impactful.
45  *
46  * A common approach to this kind of packing seems to be to just assume
47  * the top 16 bits are zero, but https://github.com/LuaJIT/LuaJIT/issues/49
48  * indicates that ARM64 platforms in the wild are actually setting bit 47
49  * in their stack addresses.  That means that we need to extend bit 47 to
50  * do the right thing (it's not expensive, it compiles to one instruction
51  * on x86-64 and arm64).
52  *
53  * Compare to PackedSyncPtr and DiscriminatedPtr, which perform similar
54  * packing but add additional functionality.  The name is taken from
55  * Java's AtomicStampedReference.  Unlike PackedSyncPtr, which tries to
56  * act pointer-like, this class acts more like a pair whose elements are
57  * named ptr and stamp.  It also allows direct access to the internal
58  * raw field: since we're already at the metal you might want to play
59  * additional games.  It is guaranteed that a zero raw value gets decoded
60  * as a (ptr,stamp) of (nullptr,0).
61  */
62 template <typename T>
63 struct StampedPtr {
64   /**
65    * The packing is not guaranteed, except that it is guaranteed that
66    * raw == 0 iff ptr() == nullptr && stamp() == 0.
67    */
68   uint64_t raw;
69 
70   /* IMPORTANT: default initialization doesn't result in a sane state */
71 
ptrStampedPtr72   T* ptr() const { return unpackPtr(raw); }
73 
stampStampedPtr74   uint16_t stamp() const { return unpackStamp(raw); }
75 
setStampedPtr76   void set(T* ptr, uint16_t stamp) { raw = pack(ptr, stamp); }
77 
setPtrStampedPtr78   void setPtr(T* ptr) { raw = pack(ptr, unpackStamp(raw)); }
79 
setStampStampedPtr80   void setStamp(uint16_t stamp) { raw = pack(unpackPtr(raw), stamp); }
81 
unpackPtrStampedPtr82   static T* unpackPtr(uint64_t raw) {
83     // Canonical form means we need to extend bit 47 of the pointer to
84     // bits 48..63 (unless the operating system never hands those pointers
85     // to us, which is difficult to prove).  Signed right-shift of a
86     // negative number is implementation-defined in C++ (not undefined!),
87     // but actually does the right thing on all the platforms I can find.
88     auto extended = static_cast<int64_t>(raw) >> kInternalStampBits;
89     return reinterpret_cast<T*>(static_cast<intptr_t>(extended));
90   }
91 
unpackStampStampedPtr92   static uint16_t unpackStamp(uint64_t raw) {
93     return static_cast<uint16_t>(raw);
94   }
95 
packStampedPtr96   static uint64_t pack(T* ptr, uint16_t stamp) {
97     auto shifted = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr))
98         << kInternalStampBits;
99     uint64_t raw = shifted | stamp;
100     FOLLY_SAFE_DCHECK(unpackPtr(raw) == ptr, "ptr mismatch.");
101     FOLLY_SAFE_DCHECK(unpackStamp(raw) == stamp, "stamp mismatch.");
102     return raw;
103   }
104 
105  private:
106   // On 32-bit platforms it works okay to store a ptr in the top 48
107   // bits of a 64-bit value, but it will result in unnecessary work.
108   // If we align the pointer part at word granularity when we have the
109   // space then no shifting will ever be needed.
110   static constexpr unsigned kInternalStampBits = sizeof(void*) == 4 ? 32 : 16;
111 };
112 
113 template <typename T>
makeStampedPtr(T * ptr,uint16_t stamp)114 StampedPtr<T> makeStampedPtr(T* ptr, uint16_t stamp) {
115   return StampedPtr<T>{StampedPtr<T>::pack(ptr, stamp)};
116 }
117 
118 } // namespace folly
119