1 /* This Source Code Form is subject to the terms of the Mozilla Public
2  * License, v. 2.0. If a copy of the MPL was not distributed with this
3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 
5 #![allow(unknown_lints)]
6 #![warn(rust_2018_idioms)]
7 // (It's tempting to avoid the utf8 checks, but they're easy to get wrong, so)
8 #![deny(unsafe_code)]
9 #[cfg(feature = "serde_support")]
10 mod serde_support;
11 
12 #[cfg(feature = "rusqlite_support")]
13 mod rusqlite_support;
14 
15 use std::{
16     cmp::Ordering,
17     fmt,
18     hash::{Hash, Hasher},
19     ops, str,
20 };
21 
22 /// This is a type intended to be used to represent the guids used by sync. It
23 /// has several benefits over using a `String`:
24 ///
25 /// 1. It's more explicit about what is being stored, and could prevent bugs
26 ///    where a Guid is passed to a function expecting text.
27 ///
28 /// 2. Guids are guaranteed to be immutable.
29 ///
30 /// 3. It's optimized for the guids commonly used by sync. In particular, short guids
31 ///    (including the guids which would meet `PlacesUtils.isValidGuid`) do not incur
32 ///    any heap allocation, and are stored inline.
33 #[derive(Clone)]
34 pub struct Guid(Repr);
35 
36 // The internal representation of a GUID. Most Sync GUIDs are 12 bytes,
37 // and contain only base64url characters; we can store them on the stack
38 // without a heap allocation. However, arbitrary ascii guids of up to length 64
39 // are possible, in which case we fall back to a heap-allocated string.
40 //
41 // This is separate only because making `Guid` an enum would expose the
42 // internals.
43 #[derive(Clone)]
44 enum Repr {
45     // see FastGuid for invariants
46     Fast(FastGuid),
47 
48     // invariants:
49     // - _0.len() > MAX_FAST_GUID_LEN
50     Slow(String),
51 }
52 
53 /// Invariants:
54 ///
55 /// - `len <= MAX_FAST_GUID_LEN`.
56 /// - `data[0..len]` encodes valid utf8.
57 /// - `data[len..].iter().all(|&b| b == b'\0')`
58 ///
59 /// Note: None of these are required for memory safety, just correctness.
60 #[derive(Clone)]
61 struct FastGuid {
62     len: u8,
63     data: [u8; MAX_FAST_GUID_LEN],
64 }
65 
66 // This is the maximum length (experimentally determined) we can make it before
67 // `Repr::Fast` is larger than `Guid::Slow` on 32 bit systems. The important
68 // thing is really that it's not too big, and is above 12 bytes.
69 const MAX_FAST_GUID_LEN: usize = 14;
70 
71 impl FastGuid {
72     #[inline]
from_slice(bytes: &[u8]) -> Self73     fn from_slice(bytes: &[u8]) -> Self {
74         // Checked by the caller, so debug_assert is fine.
75         debug_assert!(
76             can_use_fast(bytes),
77             "Bug: Caller failed to check can_use_fast: {:?}",
78             bytes
79         );
80         let mut data = [0u8; MAX_FAST_GUID_LEN];
81         data[0..bytes.len()].copy_from_slice(bytes);
82         FastGuid {
83             len: bytes.len() as u8,
84             data,
85         }
86     }
87 
88     #[inline]
as_str(&self) -> &str89     fn as_str(&self) -> &str {
90         // Note: we only use debug_assert! to enusre valid utf8-ness, so this need
91         str::from_utf8(self.bytes()).expect("Invalid fast guid bytes!")
92     }
93 
94     #[inline]
len(&self) -> usize95     fn len(&self) -> usize {
96         self.len as usize
97     }
98 
99     #[inline]
bytes(&self) -> &[u8]100     fn bytes(&self) -> &[u8] {
101         &self.data[0..self.len()]
102     }
103 }
104 
105 // Returns:
106 // - true to use Repr::Fast
107 // - false to use Repr::Slow
108 #[inline]
can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool109 fn can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool {
110     let bytes = bytes.as_ref();
111     // This is fine as a debug_assert since we'll still panic if it's ever used
112     // in such a way where it would matter.
113     debug_assert!(str::from_utf8(bytes).is_ok());
114     bytes.len() <= MAX_FAST_GUID_LEN
115 }
116 
117 impl Guid {
118     /// Create a guid from a `str`.
119     #[inline]
new(s: &str) -> Self120     pub fn new(s: &str) -> Self {
121         Guid::from_slice(s.as_ref())
122     }
123 
124     /// Create an empty guid. Usable as a constant.
125     #[inline]
empty() -> Self126     pub const fn empty() -> Self {
127         Guid(Repr::Fast(FastGuid {
128             len: 0,
129             data: [0u8; MAX_FAST_GUID_LEN],
130         }))
131     }
132 
133     /// Create a random guid (of 12 base64url characters). Requires the `random`
134     /// feature.
135     #[cfg(feature = "random")]
random() -> Self136     pub fn random() -> Self {
137         let bytes: [u8; 9] = rand::random();
138 
139         // Note: only first 12 bytes are used, but remaining are required to
140         // build the FastGuid
141         let mut output = [0u8; MAX_FAST_GUID_LEN];
142 
143         let bytes_written =
144             base64::encode_config_slice(&bytes, base64::URL_SAFE_NO_PAD, &mut output[..12]);
145 
146         debug_assert!(bytes_written == 12);
147 
148         Guid(Repr::Fast(FastGuid {
149             len: 12,
150             data: output,
151         }))
152     }
153 
154     /// Convert `b` into a `Guid`.
155     #[inline]
from_string(s: String) -> Self156     pub fn from_string(s: String) -> Self {
157         Guid::from_vec(s.into_bytes())
158     }
159 
160     /// Convert `b` into a `Guid`.
161     #[inline]
from_slice(b: &[u8]) -> Self162     pub fn from_slice(b: &[u8]) -> Self {
163         if can_use_fast(b) {
164             Guid(Repr::Fast(FastGuid::from_slice(b)))
165         } else {
166             Guid::new_slow(b.into())
167         }
168     }
169 
170     /// Convert `v` to a `Guid`, consuming it.
171     #[inline]
from_vec(v: Vec<u8>) -> Self172     pub fn from_vec(v: Vec<u8>) -> Self {
173         if can_use_fast(&v) {
174             Guid(Repr::Fast(FastGuid::from_slice(&v)))
175         } else {
176             Guid::new_slow(v)
177         }
178     }
179 
180     /// Get the data backing this `Guid` as a `&[u8]`.
181     #[inline]
as_bytes(&self) -> &[u8]182     pub fn as_bytes(&self) -> &[u8] {
183         match &self.0 {
184             Repr::Fast(rep) => rep.bytes(),
185             Repr::Slow(rep) => rep.as_ref(),
186         }
187     }
188 
189     /// Get the data backing this `Guid` as a `&str`.
190     #[inline]
as_str(&self) -> &str191     pub fn as_str(&self) -> &str {
192         match &self.0 {
193             Repr::Fast(rep) => rep.as_str(),
194             Repr::Slow(rep) => rep.as_ref(),
195         }
196     }
197 
198     /// Convert this `Guid` into a `String`, consuming it in the process.
199     #[inline]
into_string(self) -> String200     pub fn into_string(self) -> String {
201         match self.0 {
202             Repr::Fast(rep) => rep.as_str().into(),
203             Repr::Slow(rep) => rep,
204         }
205     }
206 
207     /// Returns true for Guids that are deemed valid by the sync server.
208     /// See https://github.com/mozilla-services/server-syncstorage/blob/d92ef07877aebd05b92f87f6ade341d6a55bffc8/syncstorage/bso.py#L24
is_valid_for_sync_server(&self) -> bool209     pub fn is_valid_for_sync_server(&self) -> bool {
210         !self.is_empty()
211             && self.len() <= 64
212             && self.bytes().all(|b| b >= b' ' && b <= b'~' && b != b',')
213     }
214 
215     /// Returns true for Guids that are valid places guids, and false for all others.
is_valid_for_places(&self) -> bool216     pub fn is_valid_for_places(&self) -> bool {
217         self.len() == 12 && self.bytes().all(Guid::is_valid_places_byte)
218     }
219 
220     /// Returns true if the byte `b` is a valid base64url byte.
221     #[inline]
is_valid_places_byte(b: u8) -> bool222     pub fn is_valid_places_byte(b: u8) -> bool {
223         BASE64URL_BYTES[b as usize] == 1
224     }
225 
226     #[cold]
new_slow(v: Vec<u8>) -> Self227     fn new_slow(v: Vec<u8>) -> Self {
228         assert!(
229             !can_use_fast(&v),
230             "Could use fast for guid (len = {})",
231             v.len()
232         );
233         Guid(Repr::Slow(
234             String::from_utf8(v).expect("Invalid slow guid bytes!"),
235         ))
236     }
237 }
238 
239 // This is used to implement the places tests.
240 const BASE64URL_BYTES: [u8; 256] = [
241     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
242     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
243     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
244     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
245     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
247     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249 ];
250 
251 impl Ord for Guid {
cmp(&self, other: &Self) -> Ordering252     fn cmp(&self, other: &Self) -> Ordering {
253         self.as_bytes().cmp(&other.as_bytes())
254     }
255 }
256 
257 impl PartialOrd for Guid {
partial_cmp(&self, other: &Self) -> Option<Ordering>258     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
259         Some(self.cmp(other))
260     }
261 }
262 
263 impl PartialEq for Guid {
eq(&self, other: &Self) -> bool264     fn eq(&self, other: &Self) -> bool {
265         self.as_bytes() == other.as_bytes()
266     }
267 }
268 
269 impl Eq for Guid {}
270 
271 impl Hash for Guid {
hash<H: Hasher>(&self, state: &mut H)272     fn hash<H: Hasher>(&self, state: &mut H) {
273         self.as_bytes().hash(state);
274     }
275 }
276 
277 impl<'a> From<&'a str> for Guid {
278     #[inline]
from(s: &'a str) -> Guid279     fn from(s: &'a str) -> Guid {
280         Guid::from_slice(s.as_ref())
281     }
282 }
283 impl<'a> From<&'a &str> for Guid {
284     #[inline]
from(s: &'a &str) -> Guid285     fn from(s: &'a &str) -> Guid {
286         Guid::from_slice(s.as_ref())
287     }
288 }
289 
290 impl<'a> From<&'a [u8]> for Guid {
291     #[inline]
from(s: &'a [u8]) -> Guid292     fn from(s: &'a [u8]) -> Guid {
293         Guid::from_slice(s)
294     }
295 }
296 
297 impl From<String> for Guid {
298     #[inline]
from(s: String) -> Guid299     fn from(s: String) -> Guid {
300         Guid::from_string(s)
301     }
302 }
303 
304 impl From<Vec<u8>> for Guid {
305     #[inline]
from(v: Vec<u8>) -> Guid306     fn from(v: Vec<u8>) -> Guid {
307         Guid::from_vec(v)
308     }
309 }
310 
311 impl From<Guid> for String {
312     #[inline]
from(guid: Guid) -> String313     fn from(guid: Guid) -> String {
314         guid.into_string()
315     }
316 }
317 
318 impl From<Guid> for Vec<u8> {
319     #[inline]
from(guid: Guid) -> Vec<u8>320     fn from(guid: Guid) -> Vec<u8> {
321         guid.into_string().into_bytes()
322     }
323 }
324 
325 impl AsRef<str> for Guid {
326     #[inline]
as_ref(&self) -> &str327     fn as_ref(&self) -> &str {
328         self.as_str()
329     }
330 }
331 
332 impl AsRef<[u8]> for Guid {
333     #[inline]
as_ref(&self) -> &[u8]334     fn as_ref(&self) -> &[u8] {
335         self.as_bytes()
336     }
337 }
338 
339 impl ops::Deref for Guid {
340     type Target = str;
341     #[inline]
deref(&self) -> &str342     fn deref(&self) -> &str {
343         self.as_str()
344     }
345 }
346 
347 // The default Debug impl is pretty unhelpful here.
348 impl fmt::Debug for Guid {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result349     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350         write!(f, "Guid({:?})", self.as_str())
351     }
352 }
353 
354 impl fmt::Display for Guid {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result355     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356         fmt::Display::fmt(self.as_str(), f)
357     }
358 }
359 
360 impl std::default::Default for Guid {
361     /// Create a default guid by calling `Guid::empty()`
362     #[inline]
default() -> Self363     fn default() -> Self {
364         Guid::empty()
365     }
366 }
367 
368 macro_rules! impl_guid_eq {
369     ($($other: ty),+) => {$(
370         impl<'a> PartialEq<$other> for Guid {
371             #[inline]
372             fn eq(&self, other: &$other) -> bool {
373                 PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
374             }
375         }
376 
377         impl<'a> PartialEq<Guid> for $other {
378             #[inline]
379             fn eq(&self, other: &Guid) -> bool {
380                 PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
381             }
382         }
383     )+}
384 }
385 
386 // Implement direct comparison with some common types from the stdlib.
387 impl_guid_eq![str, &'a str, String, [u8], &'a [u8], Vec<u8>];
388 
389 #[cfg(test)]
390 mod test {
391     use super::*;
392 
393     #[test]
test_base64url_bytes()394     fn test_base64url_bytes() {
395         let mut expect = [0u8; 256];
396         for b in b'0'..=b'9' {
397             expect[b as usize] = 1;
398         }
399         for b in b'a'..=b'z' {
400             expect[b as usize] = 1;
401         }
402         for b in b'A'..=b'Z' {
403             expect[b as usize] = 1;
404         }
405         expect[b'_' as usize] = 1;
406         expect[b'-' as usize] = 1;
407         assert_eq!(&BASE64URL_BYTES[..], &expect[..]);
408     }
409 
410     #[test]
test_valid_for_places()411     fn test_valid_for_places() {
412         assert!(Guid::from("aaaabbbbcccc").is_valid_for_places());
413         assert!(Guid::from_slice(b"09_az-AZ_09-").is_valid_for_places());
414         assert!(!Guid::from("aaaabbbbccccd").is_valid_for_places()); // too long
415         assert!(!Guid::from("aaaabbbbccc").is_valid_for_places()); // too short
416         assert!(!Guid::from("aaaabbbbccc=").is_valid_for_places()); // right length, bad character
417     }
418 
419     #[test]
test_comparison()420     fn test_comparison() {
421         assert_eq!(Guid::from("abcdabcdabcd"), "abcdabcdabcd");
422         assert_ne!(Guid::from("abcdabcdabcd".to_string()), "ABCDabcdabcd");
423 
424         assert_eq!(Guid::from("abcdabcdabcd"), &b"abcdabcdabcd"[..]); // b"abcdabcdabcd" has type &[u8; 12]...
425         assert_ne!(Guid::from(&b"abcdabcdabcd"[..]), &b"ABCDabcdabcd"[..]);
426 
427         assert_eq!(
428             Guid::from(b"abcdabcdabcd"[..].to_owned()),
429             "abcdabcdabcd".to_string()
430         );
431         assert_ne!(Guid::from("abcdabcdabcd"), "ABCDabcdabcd".to_string());
432 
433         assert_eq!(
434             Guid::from("abcdabcdabcd1234"),
435             Vec::from(b"abcdabcdabcd1234".as_ref())
436         );
437         assert_ne!(
438             Guid::from("abcdabcdabcd4321"),
439             Vec::from(b"ABCDabcdabcd4321".as_ref())
440         );
441 
442         // order by data instead of length
443         assert!(Guid::from("zzz") > Guid::from("aaaaaa"));
444         assert!(Guid::from("ThisIsASolowGuid") < Guid::from("zzz"));
445         assert!(Guid::from("ThisIsASolowGuid") > Guid::from("AnotherSlowGuid"));
446     }
447 
448     #[cfg(feature = "random")]
449     #[test]
test_random()450     fn test_random() {
451         use std::collections::HashSet;
452         // Used to verify uniqueness within our sample of 1000. Could cause
453         // random failures, but desktop has the same test, and it's never caused
454         // a problem AFAIK.
455         let mut seen: HashSet<String> = HashSet::new();
456         for _ in 0..1000 {
457             let g = Guid::random();
458             assert_eq!(g.len(), 12);
459             assert!(g.is_valid_for_places());
460             let decoded = base64::decode_config(&g, base64::URL_SAFE_NO_PAD).unwrap();
461             assert_eq!(decoded.len(), 9);
462             let no_collision = seen.insert(g.clone().into_string());
463             assert!(no_collision, "{}", g);
464         }
465     }
466 }
467