1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #![allow(unknown_lints)]
6 #![warn(rust_2018_idioms)]
7 // (It's tempting to avoid the utf8 checks, but they're easy to get wrong, so)
8 #![deny(unsafe_code)]
9 #[cfg(feature = "serde_support")]
10 mod serde_support;
11 
12 #[cfg(feature = "rusqlite_support")]
13 mod rusqlite_support;
14 
15 use std::{
16     cmp::Ordering,
17     fmt,
18     hash::{Hash, Hasher},
19     ops, str,
20 };
21 
22 /// This is a type intended to be used to represent the guids used by sync. It
23 /// has several benefits over using a `String`:
24 ///
25 /// 1. It's more explicit about what is being stored, and could prevent bugs
26 ///    where a Guid is passed to a function expecting text.
27 ///
28 /// 2. Guids are guaranteed to be immutable.
29 ///
30 /// 3. It's optimized for the guids commonly used by sync. In particular, short guids
31 ///    (including the guids which would meet `PlacesUtils.isValidGuid`) do not incur
32 ///    any heap allocation, and are stored inline.
33 #[derive(Clone)]
34 pub struct Guid(Repr);
35 
36 // The internal representation of a GUID. Most Sync GUIDs are 12 bytes,
37 // and contain only base64url characters; we can store them on the stack
38 // without a heap allocation. However, arbitrary ascii guids of up to length 64
39 // are possible, in which case we fall back to a heap-allocated string.
40 //
41 // This is separate only because making `Guid` an enum would expose the
42 // internals.
43 #[derive(Clone)]
44 enum Repr {
45     // see FastGuid for invariants
46     Fast(FastGuid),
47 
48     // invariants:
49     // - _0.len() > MAX_FAST_GUID_LEN
50     Slow(String),
51 }
52 
53 /// Invariants:
54 ///
55 /// - `len <= MAX_FAST_GUID_LEN`.
56 /// - `data[0..len]` encodes valid utf8.
57 /// - `data[len..].iter().all(|&b| b == b'\0')`
58 ///
59 /// Note: None of these are required for memory safety, just correctness.
60 #[derive(Clone)]
61 struct FastGuid {
62     len: u8,
63     data: [u8; MAX_FAST_GUID_LEN],
64 }
65 
66 // This is the maximum length (experimentally determined) we can make it before
67 // `Repr::Fast` is larger than `Guid::Slow` on 32 bit systems. The important
68 // thing is really that it's not too big, and is above 12 bytes.
69 const MAX_FAST_GUID_LEN: usize = 14;
70 
71 impl FastGuid {
72     #[inline]
from_slice(bytes: &[u8]) -> Self73     fn from_slice(bytes: &[u8]) -> Self {
74         // Checked by the caller, so debug_assert is fine.
75         debug_assert!(
76             can_use_fast(bytes),
77             "Bug: Caller failed to check can_use_fast: {:?}",
78             bytes
79         );
80         let mut data = [0u8; MAX_FAST_GUID_LEN];
81         data[0..bytes.len()].copy_from_slice(bytes);
82         FastGuid {
83             len: bytes.len() as u8,
84             data,
85         }
86     }
87 
88     #[inline]
as_str(&self) -> &str89     fn as_str(&self) -> &str {
90         // Note: we only use debug_assert! to enusre valid utf8-ness, so this need
91         str::from_utf8(self.bytes()).expect("Invalid fast guid bytes!")
92     }
93 
94     #[inline]
len(&self) -> usize95     fn len(&self) -> usize {
96         self.len as usize
97     }
98 
99     #[inline]
bytes(&self) -> &[u8]100     fn bytes(&self) -> &[u8] {
101         &self.data[0..self.len()]
102     }
103 }
104 
105 // Returns:
106 // - true to use Repr::Fast
107 // - false to use Repr::Slow
108 #[inline]
can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool109 fn can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool {
110     let bytes = bytes.as_ref();
111     // This is fine as a debug_assert since we'll still panic if it's ever used
112     // in such a way where it would matter.
113     debug_assert!(str::from_utf8(bytes).is_ok());
114     bytes.len() <= MAX_FAST_GUID_LEN
115 }
116 
117 impl Guid {
118     /// Create a guid from a `str`.
119     #[inline]
new(s: &str) -> Self120     pub fn new(s: &str) -> Self {
121         Guid::from_slice(s.as_ref())
122     }
123 
124     /// Create an empty guid. Usable as a constant.
125     #[inline]
empty() -> Self126     pub const fn empty() -> Self {
127         Guid(Repr::Fast(FastGuid {
128             len: 0,
129             data: [0u8; MAX_FAST_GUID_LEN],
130         }))
131     }
132 
133     /// Create a random guid (of 12 base64url characters). Requires the `random`
134     /// feature.
135     #[cfg(feature = "random")]
random() -> Self136     pub fn random() -> Self {
137         let bytes: [u8; 9] = rand::random();
138 
139         // Note: only first 12 bytes are used, but remaining are required to
140         // build the FastGuid
141         let mut output = [0u8; MAX_FAST_GUID_LEN];
142 
143         let bytes_written =
144             base64::encode_config_slice(&bytes, base64::URL_SAFE_NO_PAD, &mut output[..12]);
145 
146         debug_assert!(bytes_written == 12);
147 
148         Guid(Repr::Fast(FastGuid {
149             len: 12,
150             data: output,
151         }))
152     }
153 
154     /// Convert `b` into a `Guid`.
155     #[inline]
from_string(s: String) -> Self156     pub fn from_string(s: String) -> Self {
157         Guid::from_vec(s.into_bytes())
158     }
159 
160     /// Convert `b` into a `Guid`.
161     #[inline]
from_slice(b: &[u8]) -> Self162     pub fn from_slice(b: &[u8]) -> Self {
163         if can_use_fast(b) {
164             Guid(Repr::Fast(FastGuid::from_slice(b)))
165         } else {
166             Guid::new_slow(b.into())
167         }
168     }
169 
170     /// Convert `v` to a `Guid`, consuming it.
171     #[inline]
from_vec(v: Vec<u8>) -> Self172     pub fn from_vec(v: Vec<u8>) -> Self {
173         if can_use_fast(&v) {
174             Guid(Repr::Fast(FastGuid::from_slice(&v)))
175         } else {
176             Guid::new_slow(v)
177         }
178     }
179 
180     /// Get the data backing this `Guid` as a `&[u8]`.
181     #[inline]
as_bytes(&self) -> &[u8]182     pub fn as_bytes(&self) -> &[u8] {
183         match &self.0 {
184             Repr::Fast(rep) => rep.bytes(),
185             Repr::Slow(rep) => rep.as_ref(),
186         }
187     }
188 
189     /// Get the data backing this `Guid` as a `&str`.
190     #[inline]
as_str(&self) -> &str191     pub fn as_str(&self) -> &str {
192         match &self.0 {
193             Repr::Fast(rep) => rep.as_str(),
194             Repr::Slow(rep) => rep.as_ref(),
195         }
196     }
197 
198     /// Convert this `Guid` into a `String`, consuming it in the process.
199     #[inline]
into_string(self) -> String200     pub fn into_string(self) -> String {
201         match self.0 {
202             Repr::Fast(rep) => rep.as_str().into(),
203             Repr::Slow(rep) => rep,
204         }
205     }
206 
207     /// Returns true for Guids that are deemed valid by the sync server.
208     /// See https://github.com/mozilla-services/server-syncstorage/blob/d92ef07877aebd05b92f87f6ade341d6a55bffc8/syncstorage/bso.py#L24
is_valid_for_sync_server(&self) -> bool209     pub fn is_valid_for_sync_server(&self) -> bool {
210         !self.is_empty()
211             && self.len() <= 64
212             && self
213                 .bytes()
214                 .all(|b| (b' '..=b'~').contains(&b) && b != b',')
215     }
216 
217     /// Returns true for Guids that are valid places guids, and false for all others.
is_valid_for_places(&self) -> bool218     pub fn is_valid_for_places(&self) -> bool {
219         self.len() == 12 && self.bytes().all(Guid::is_valid_places_byte)
220     }
221 
222     /// Returns true if the byte `b` is a valid base64url byte.
223     #[inline]
is_valid_places_byte(b: u8) -> bool224     pub fn is_valid_places_byte(b: u8) -> bool {
225         BASE64URL_BYTES[b as usize] == 1
226     }
227 
228     #[cold]
new_slow(v: Vec<u8>) -> Self229     fn new_slow(v: Vec<u8>) -> Self {
230         assert!(
231             !can_use_fast(&v),
232             "Could use fast for guid (len = {})",
233             v.len()
234         );
235         Guid(Repr::Slow(
236             String::from_utf8(v).expect("Invalid slow guid bytes!"),
237         ))
238     }
239 }
240 
241 // This is used to implement the places tests.
242 const BASE64URL_BYTES: [u8; 256] = [
243     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
244     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
245     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
246     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
247     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
251 ];
252 
253 impl Ord for Guid {
cmp(&self, other: &Self) -> Ordering254     fn cmp(&self, other: &Self) -> Ordering {
255         self.as_bytes().cmp(&other.as_bytes())
256     }
257 }
258 
259 impl PartialOrd for Guid {
partial_cmp(&self, other: &Self) -> Option<Ordering>260     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
261         Some(self.cmp(other))
262     }
263 }
264 
265 impl PartialEq for Guid {
eq(&self, other: &Self) -> bool266     fn eq(&self, other: &Self) -> bool {
267         self.as_bytes() == other.as_bytes()
268     }
269 }
270 
271 impl Eq for Guid {}
272 
273 impl Hash for Guid {
hash<H: Hasher>(&self, state: &mut H)274     fn hash<H: Hasher>(&self, state: &mut H) {
275         self.as_bytes().hash(state);
276     }
277 }
278 
279 impl<'a> From<&'a str> for Guid {
280     #[inline]
from(s: &'a str) -> Guid281     fn from(s: &'a str) -> Guid {
282         Guid::from_slice(s.as_ref())
283     }
284 }
285 impl<'a> From<&'a &str> for Guid {
286     #[inline]
from(s: &'a &str) -> Guid287     fn from(s: &'a &str) -> Guid {
288         Guid::from_slice(s.as_ref())
289     }
290 }
291 
292 impl<'a> From<&'a [u8]> for Guid {
293     #[inline]
from(s: &'a [u8]) -> Guid294     fn from(s: &'a [u8]) -> Guid {
295         Guid::from_slice(s)
296     }
297 }
298 
299 impl From<String> for Guid {
300     #[inline]
from(s: String) -> Guid301     fn from(s: String) -> Guid {
302         Guid::from_string(s)
303     }
304 }
305 
306 impl From<Vec<u8>> for Guid {
307     #[inline]
from(v: Vec<u8>) -> Guid308     fn from(v: Vec<u8>) -> Guid {
309         Guid::from_vec(v)
310     }
311 }
312 
313 impl From<Guid> for String {
314     #[inline]
from(guid: Guid) -> String315     fn from(guid: Guid) -> String {
316         guid.into_string()
317     }
318 }
319 
320 impl From<Guid> for Vec<u8> {
321     #[inline]
from(guid: Guid) -> Vec<u8>322     fn from(guid: Guid) -> Vec<u8> {
323         guid.into_string().into_bytes()
324     }
325 }
326 
327 impl AsRef<str> for Guid {
328     #[inline]
as_ref(&self) -> &str329     fn as_ref(&self) -> &str {
330         self.as_str()
331     }
332 }
333 
334 impl AsRef<[u8]> for Guid {
335     #[inline]
as_ref(&self) -> &[u8]336     fn as_ref(&self) -> &[u8] {
337         self.as_bytes()
338     }
339 }
340 
341 impl ops::Deref for Guid {
342     type Target = str;
343     #[inline]
deref(&self) -> &str344     fn deref(&self) -> &str {
345         self.as_str()
346     }
347 }
348 
349 // The default Debug impl is pretty unhelpful here.
350 impl fmt::Debug for Guid {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result351     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
352         write!(f, "Guid({:?})", self.as_str())
353     }
354 }
355 
356 impl fmt::Display for Guid {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result357     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
358         fmt::Display::fmt(self.as_str(), f)
359     }
360 }
361 
362 impl std::default::Default for Guid {
363     /// Create a default guid by calling `Guid::empty()`
364     #[inline]
default() -> Self365     fn default() -> Self {
366         Guid::empty()
367     }
368 }
369 
370 macro_rules! impl_guid_eq {
371     ($($other: ty),+) => {$(
372         impl<'a> PartialEq<$other> for Guid {
373             #[inline]
374             fn eq(&self, other: &$other) -> bool {
375                 PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
376             }
377         }
378 
379         impl<'a> PartialEq<Guid> for $other {
380             #[inline]
381             fn eq(&self, other: &Guid) -> bool {
382                 PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
383             }
384         }
385     )+}
386 }
387 
388 // Implement direct comparison with some common types from the stdlib.
389 impl_guid_eq![str, &'a str, String, [u8], &'a [u8], Vec<u8>];
390 
391 #[cfg(test)]
392 mod test {
393     use super::*;
394 
395     #[test]
test_base64url_bytes()396     fn test_base64url_bytes() {
397         let mut expect = [0u8; 256];
398         for b in b'0'..=b'9' {
399             expect[b as usize] = 1;
400         }
401         for b in b'a'..=b'z' {
402             expect[b as usize] = 1;
403         }
404         for b in b'A'..=b'Z' {
405             expect[b as usize] = 1;
406         }
407         expect[b'_' as usize] = 1;
408         expect[b'-' as usize] = 1;
409         assert_eq!(&BASE64URL_BYTES[..], &expect[..]);
410     }
411 
412     #[test]
test_valid_for_places()413     fn test_valid_for_places() {
414         assert!(Guid::from("aaaabbbbcccc").is_valid_for_places());
415         assert!(Guid::from_slice(b"09_az-AZ_09-").is_valid_for_places());
416         assert!(!Guid::from("aaaabbbbccccd").is_valid_for_places()); // too long
417         assert!(!Guid::from("aaaabbbbccc").is_valid_for_places()); // too short
418         assert!(!Guid::from("aaaabbbbccc=").is_valid_for_places()); // right length, bad character
419     }
420 
421     #[allow(clippy::cmp_owned)] // See clippy note below.
422     #[test]
test_comparison()423     fn test_comparison() {
424         assert_eq!(Guid::from("abcdabcdabcd"), "abcdabcdabcd");
425         assert_ne!(Guid::from("abcdabcdabcd".to_string()), "ABCDabcdabcd");
426 
427         assert_eq!(Guid::from("abcdabcdabcd"), &b"abcdabcdabcd"[..]); // b"abcdabcdabcd" has type &[u8; 12]...
428         assert_ne!(Guid::from(&b"abcdabcdabcd"[..]), &b"ABCDabcdabcd"[..]);
429 
430         assert_eq!(
431             Guid::from(b"abcdabcdabcd"[..].to_owned()),
432             "abcdabcdabcd".to_string()
433         );
434         assert_ne!(Guid::from("abcdabcdabcd"), "ABCDabcdabcd".to_string());
435 
436         assert_eq!(
437             Guid::from("abcdabcdabcd1234"),
438             Vec::from(b"abcdabcdabcd1234".as_ref())
439         );
440         assert_ne!(
441             Guid::from("abcdabcdabcd4321"),
442             Vec::from(b"ABCDabcdabcd4321".as_ref())
443         );
444 
445         // order by data instead of length
446         // hrmph - clippy in 1.54-nightly complains about the below:
447         // 'error: this creates an owned instance just for comparison'
448         // '... help: try: `*"aaaaaa"`'
449         // and suggests a change that's wrong - so we've ignored the lint above.
450         assert!(Guid::from("zzz") > Guid::from("aaaaaa"));
451         assert!(Guid::from("ThisIsASolowGuid") < Guid::from("zzz"));
452         assert!(Guid::from("ThisIsASolowGuid") > Guid::from("AnotherSlowGuid"));
453     }
454 
455     #[cfg(feature = "random")]
456     #[test]
test_random()457     fn test_random() {
458         use std::collections::HashSet;
459         // Used to verify uniqueness within our sample of 1000. Could cause
460         // random failures, but desktop has the same test, and it's never caused
461         // a problem AFAIK.
462         let mut seen: HashSet<String> = HashSet::new();
463         for _ in 0..1000 {
464             let g = Guid::random();
465             assert_eq!(g.len(), 12);
466             assert!(g.is_valid_for_places());
467             let decoded = base64::decode_config(&g, base64::URL_SAFE_NO_PAD).unwrap();
468             assert_eq!(decoded.len(), 9);
469             let no_collision = seen.insert(g.clone().into_string());
470             assert!(no_collision, "{}", g);
471         }
472     }
473 }
474