1 // This module implements Identifier, a short-optimized string allowed to
2 // contain only the ASCII characters hyphen, dot, 0-9, A-Z, a-z.
3 //
4 // As of mid-2021, the distribution of pre-release lengths on crates.io is:
5 //
6 // length count length count length count
7 // 0 355929 11 81 24 2
8 // 1 208 12 48 25 6
9 // 2 236 13 55 26 10
10 // 3 1909 14 25 27 4
11 // 4 1284 15 15 28 1
12 // 5 1742 16 35 30 1
13 // 6 3440 17 9 31 5
14 // 7 5624 18 6 32 1
15 // 8 1321 19 12 36 2
16 // 9 179 20 2 37 379
17 // 10 65 23 11
18 //
19 // and the distribution of build metadata lengths is:
20 //
21 // length count length count length count
22 // 0 364445 8 7725 18 1
23 // 1 72 9 16 19 1
24 // 2 7 10 85 20 1
25 // 3 28 11 17 22 4
26 // 4 9 12 10 26 1
27 // 5 68 13 9 27 1
28 // 6 73 14 10 40 5
29 // 7 53 15 6
30 //
31 // Therefore it really behooves us to be able to use the entire 8 bytes of a
32 // pointer for inline storage. For both pre-release and build metadata there are
33 // vastly more strings with length exactly 8 bytes than the sum over all lengths
34 // longer than 8 bytes.
35 //
36 // To differentiate the inline representation from the heap allocated long
37 // representation, we'll allocate heap pointers with 2-byte alignment so that
38 // they are guaranteed to have an unset least significant bit. Then in the repr
39 // we store for pointers, we rotate a 1 into the most significant bit of the
40 // most significant byte, which is never set for an ASCII byte.
41 //
42 // Inline repr:
43 //
44 // 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx
45 //
46 // Heap allocated repr:
47 //
48 // 1ppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp pppppppp 0
49 // ^ most significant bit least significant bit of orig ptr, rotated out ^
50 //
51 // Since the most significant bit doubles as a sign bit for the similarly sized
52 // signed integer type, the CPU has an efficient instruction for inspecting it,
53 // meaning we can differentiate between an inline repr and a heap allocated repr
54 // in one instruction. Effectively an inline repr always looks like a positive
55 // i64 while a heap allocated repr always looks like a negative i64.
56 //
57 // For the inline repr, we store \0 padding on the end of the stored characters,
58 // and thus the string length is readily determined efficiently by a cttz (count
59 // trailing zeros) or bsf (bit scan forward) instruction.
60 //
61 // For the heap allocated repr, the length is encoded as a base-128 varint at
62 // the head of the allocation.
63 //
64 // Empty strings are stored as an all-1 bit pattern, corresponding to -1i64.
65 // Consequently the all-0 bit pattern is never a legal representation in any
66 // repr, leaving it available as a niche for downstream code. For example this
67 // allows size_of::<Version>() == size_of::<Option<Version>>().
68
69 use crate::alloc::alloc::{alloc, dealloc, Layout};
70 #[cfg(no_from_ne_bytes)]
71 use crate::backport::FromNeBytes;
72 use core::mem;
73 use core::num::{NonZeroU64, NonZeroUsize};
74 use core::ptr;
75 use core::slice;
76 use core::str;
77
78 #[repr(transparent)]
79 pub(crate) struct Identifier {
80 repr: NonZeroU64,
81 }
82
83 impl Identifier {
84 const EMPTY: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(!0) };
85
empty() -> Self86 pub(crate) const fn empty() -> Self {
87 // `mov rax, -1`
88 Identifier { repr: Self::EMPTY }
89 }
90
91 // SAFETY: string must be ASCII and not contain \0 bytes.
new_unchecked(string: &str) -> Self92 pub(crate) unsafe fn new_unchecked(string: &str) -> Self {
93 let len = string.len();
94 let repr = match len as u64 {
95 0 => Self::EMPTY,
96 1..=8 => {
97 let mut bytes = [0u8; 8];
98 // SAFETY: string is big enough to read len bytes, bytes is big
99 // enough to write len bytes, and they do not overlap.
100 unsafe { ptr::copy_nonoverlapping(string.as_ptr(), bytes.as_mut_ptr(), len) };
101 // SAFETY: it's nonzero because the input string was at least 1
102 // byte of ASCII and did not contain \0.
103 unsafe { NonZeroU64::new_unchecked(u64::from_ne_bytes(bytes)) }
104 }
105 9..=0xff_ffff_ffff_ffff => {
106 // SAFETY: len is in a range that does not contain 0.
107 let size = bytes_for_varint(unsafe { NonZeroUsize::new_unchecked(len) }) + len;
108 let align = 2;
109 // SAFETY: align is not zero, align is a power of two, and
110 // rounding size up to align does not overflow usize::MAX.
111 let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
112 // SAFETY: layout's size is nonzero.
113 let ptr = unsafe { alloc(layout) };
114 let mut write = ptr;
115 let mut varint_remaining = len;
116 while varint_remaining > 0 {
117 // SAFETY: size is bytes_for_varint(len) bytes + len bytes.
118 // This is writing the first bytes_for_variant(len) bytes.
119 unsafe { ptr::write(write, varint_remaining as u8 | 0x80) };
120 varint_remaining >>= 7;
121 // SAFETY: still in bounds of the same allocation.
122 write = unsafe { write.add(1) };
123 }
124 // SAFETY: size is bytes_for_varint(len) bytes + len bytes. This
125 // is writing to the last len bytes.
126 unsafe { ptr::copy_nonoverlapping(string.as_ptr(), write, len) };
127 ptr_to_repr(ptr)
128 }
129 0x100_0000_0000_0000..=0xffff_ffff_ffff_ffff => {
130 unreachable!("please refrain from storing >64 petabytes of text in semver version");
131 }
132 #[cfg(no_exhaustive_int_match)] // rustc <1.33
133 _ => unreachable!(),
134 };
135 Identifier { repr }
136 }
137
is_empty(&self) -> bool138 pub(crate) fn is_empty(&self) -> bool {
139 // `cmp rdi, -1` -- basically: `repr as i64 == -1`
140 self.repr == Self::EMPTY
141 }
142
is_inline(&self) -> bool143 fn is_inline(&self) -> bool {
144 // `test rdi, rdi` -- basically: `repr as i64 >= 0`
145 self.repr.get() >> 63 == 0
146 }
147
is_empty_or_inline(&self) -> bool148 fn is_empty_or_inline(&self) -> bool {
149 // `cmp rdi, -2` -- basically: `repr as i64 > -2`
150 self.is_empty() || self.is_inline()
151 }
152
as_str(&self) -> &str153 pub(crate) fn as_str(&self) -> &str {
154 if self.is_empty() {
155 ""
156 } else if self.is_inline() {
157 // SAFETY: repr is in the inline representation.
158 unsafe { inline_as_str(&self.repr) }
159 } else {
160 // SAFETY: repr is in the heap allocated representation.
161 unsafe { ptr_as_str(&self.repr) }
162 }
163 }
164 }
165
166 impl Clone for Identifier {
clone(&self) -> Self167 fn clone(&self) -> Self {
168 let repr = if self.is_empty_or_inline() {
169 self.repr
170 } else {
171 let ptr = repr_to_ptr(self.repr);
172 // SAFETY: ptr is one of our own heap allocations.
173 let len = unsafe { decode_len(ptr) };
174 let size = bytes_for_varint(len) + len.get();
175 let align = 2;
176 // SAFETY: align is not zero, align is a power of two, and rounding
177 // size up to align does not overflow usize::MAX. This is just
178 // duplicating a previous allocation where all of these guarantees
179 // were already made.
180 let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
181 // SAFETY: layout's size is nonzero.
182 let clone = unsafe { alloc(layout) };
183 // SAFETY: new allocation cannot overlap the previous one (this was
184 // not a realloc). The argument ptrs are readable/writeable
185 // respectively for size bytes.
186 unsafe { ptr::copy_nonoverlapping(ptr, clone, size) }
187 ptr_to_repr(clone)
188 };
189 Identifier { repr }
190 }
191 }
192
193 impl Drop for Identifier {
drop(&mut self)194 fn drop(&mut self) {
195 if self.is_empty_or_inline() {
196 return;
197 }
198 let ptr = repr_to_ptr_mut(self.repr);
199 // SAFETY: ptr is one of our own heap allocations.
200 let len = unsafe { decode_len(ptr) };
201 let size = bytes_for_varint(len) + len.get();
202 let align = 2;
203 // SAFETY: align is not zero, align is a power of two, and rounding
204 // size up to align does not overflow usize::MAX. These guarantees were
205 // made when originally allocating this memory.
206 let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
207 // SAFETY: ptr was previously allocated by the same allocator with the
208 // same layout.
209 unsafe { dealloc(ptr, layout) }
210 }
211 }
212
213 impl PartialEq for Identifier {
eq(&self, rhs: &Self) -> bool214 fn eq(&self, rhs: &Self) -> bool {
215 if self.is_empty_or_inline() {
216 // Fast path (most common)
217 self.repr == rhs.repr
218 } else if rhs.is_empty_or_inline() {
219 false
220 } else {
221 // SAFETY: both reprs are in the heap allocated representation.
222 unsafe { ptr_as_str(&self.repr) == ptr_as_str(&rhs.repr) }
223 }
224 }
225 }
226
227 // We use heap pointers that are 2-byte aligned, meaning they have an
228 // insignificant 0 in the least significant bit. We take advantage of that
229 // unneeded bit to rotate a 1 into the most significant bit to make the repr
230 // distinguishable from ASCII bytes.
ptr_to_repr(ptr: *mut u8) -> NonZeroU64231 fn ptr_to_repr(ptr: *mut u8) -> NonZeroU64 {
232 // `mov eax, 1`
233 // `shld rax, rdi, 63`
234 let repr = (ptr as u64 | 1).rotate_right(1);
235
236 // SAFETY: the most significant bit of repr is known to be set, so the value
237 // is not zero.
238 unsafe { NonZeroU64::new_unchecked(repr) }
239 }
240
241 // Shift out the 1 previously placed into the most significant bit of the least
242 // significant byte. Shift in a low 0 bit to reconstruct the original 2-byte
243 // aligned pointer.
repr_to_ptr(repr: NonZeroU64) -> *const u8244 fn repr_to_ptr(repr: NonZeroU64) -> *const u8 {
245 // `lea rax, [rdi + rdi]`
246 (repr.get() << 1) as *const u8
247 }
248
repr_to_ptr_mut(repr: NonZeroU64) -> *mut u8249 fn repr_to_ptr_mut(repr: NonZeroU64) -> *mut u8 {
250 repr_to_ptr(repr) as *mut u8
251 }
252
253 // Compute the length of the inline string, assuming the argument is in short
254 // string representation. Short strings are stored as 1 to 8 nonzero ASCII
255 // bytes, followed by \0 padding for the remaining bytes.
inline_len(repr: NonZeroU64) -> NonZeroUsize256 fn inline_len(repr: NonZeroU64) -> NonZeroUsize {
257 // Rustc >=1.53 has intrinsics for counting zeros on a non-zeroable integer.
258 // On many architectures these are more efficient than counting on ordinary
259 // zeroable integers (bsf vs cttz). On rustc <1.53 without those intrinsics,
260 // we count zeros in the u64 rather than the NonZeroU64.
261 #[cfg(no_nonzero_bitscan)]
262 let repr = repr.get();
263
264 #[cfg(target_endian = "little")]
265 let zero_bits_on_string_end = repr.leading_zeros();
266 #[cfg(target_endian = "big")]
267 let zero_bits_on_string_end = repr.trailing_zeros();
268
269 let nonzero_bytes = 8 - zero_bits_on_string_end as usize / 8;
270
271 // SAFETY: repr is nonzero, so it has at most 63 zero bits on either end,
272 // thus at least one nonzero byte.
273 unsafe { NonZeroUsize::new_unchecked(nonzero_bytes) }
274 }
275
276 // SAFETY: repr must be in the inline representation, i.e. at least 1 and at
277 // most 8 nonzero ASCII bytes padded on the end with \0 bytes.
inline_as_str(repr: &NonZeroU64) -> &str278 unsafe fn inline_as_str(repr: &NonZeroU64) -> &str {
279 let ptr = repr as *const NonZeroU64 as *const u8;
280 let len = inline_len(*repr).get();
281 // SAFETY: we are viewing the nonzero ASCII prefix of the inline repr's
282 // contents as a slice of bytes. Input/output lifetimes are correctly
283 // associated.
284 let slice = unsafe { slice::from_raw_parts(ptr, len) };
285 // SAFETY: the string contents are known to be only ASCII bytes, which are
286 // always valid UTF-8.
287 unsafe { str::from_utf8_unchecked(slice) }
288 }
289
290 // Decode varint. Varints consist of between one and eight base-128 digits, each
291 // of which is stored in a byte with most significant bit set. Adjacent to the
292 // varint in memory there is guaranteed to be at least 9 ASCII bytes, each of
293 // which has an unset most significant bit.
294 //
295 // SAFETY: ptr must be one of our own heap allocations, with the varint header
296 // already written.
decode_len(ptr: *const u8) -> NonZeroUsize297 unsafe fn decode_len(ptr: *const u8) -> NonZeroUsize {
298 // SAFETY: There is at least one byte of varint followed by at least 9 bytes
299 // of string content, which is at least 10 bytes total for the allocation,
300 // so reading the first two is no problem.
301 let [first, second] = unsafe { ptr::read(ptr as *const [u8; 2]) };
302 if second < 0x80 {
303 // SAFETY: the length of this heap allocated string has been encoded as
304 // one base-128 digit, so the length is at least 9 and at most 127. It
305 // cannot be zero.
306 unsafe { NonZeroUsize::new_unchecked((first & 0x7f) as usize) }
307 } else {
308 return unsafe { decode_len_cold(ptr) };
309
310 // Identifiers 128 bytes or longer. This is not exercised by any crate
311 // version currently published to crates.io.
312 #[cold]
313 #[inline(never)]
314 unsafe fn decode_len_cold(mut ptr: *const u8) -> NonZeroUsize {
315 let mut len = 0;
316 let mut shift = 0;
317 loop {
318 // SAFETY: varint continues while there are bytes having the
319 // most significant bit set, i.e. until we start hitting the
320 // ASCII string content with msb unset.
321 let byte = unsafe { *ptr };
322 if byte < 0x80 {
323 // SAFETY: the string length is known to be 128 bytes or
324 // longer.
325 return unsafe { NonZeroUsize::new_unchecked(len) };
326 }
327 // SAFETY: still in bounds of the same allocation.
328 ptr = unsafe { ptr.add(1) };
329 len += ((byte & 0x7f) as usize) << shift;
330 shift += 7;
331 }
332 }
333 }
334 }
335
336 // SAFETY: repr must be in the heap allocated representation, with varint header
337 // and string contents already written.
ptr_as_str(repr: &NonZeroU64) -> &str338 unsafe fn ptr_as_str(repr: &NonZeroU64) -> &str {
339 let ptr = repr_to_ptr(*repr);
340 let len = unsafe { decode_len(ptr) };
341 let header = bytes_for_varint(len);
342 let slice = unsafe { slice::from_raw_parts(ptr.add(header), len.get()) };
343 // SAFETY: all identifier contents are ASCII bytes, which are always valid
344 // UTF-8.
345 unsafe { str::from_utf8_unchecked(slice) }
346 }
347
348 // Number of base-128 digits required for the varint representation of a length.
bytes_for_varint(len: NonZeroUsize) -> usize349 fn bytes_for_varint(len: NonZeroUsize) -> usize {
350 #[cfg(no_nonzero_bitscan)] // rustc <1.53
351 let len = len.get();
352
353 let usize_bits = mem::size_of::<usize>() * 8;
354 let len_bits = usize_bits - len.leading_zeros() as usize;
355 (len_bits + 6) / 7
356 }
357