1 //! Benchmark the overhead that the synchronization of `OnceCell::get` causes.
2 //! We do some other operations that write to memory to get an imprecise but somewhat realistic
3 //! measurement.
4 
5 use once_cell::sync::OnceCell;
6 use std::sync::atomic::{AtomicUsize, Ordering};
7 
8 const N_THREADS: usize = 16;
9 const N_ROUNDS: usize = 1_000_000;
10 
11 static CELL: OnceCell<usize> = OnceCell::new();
12 static OTHER: AtomicUsize = AtomicUsize::new(0);
13 
main()14 fn main() {
15     let start = std::time::Instant::now();
16     let threads =
17         (0..N_THREADS).map(|i| std::thread::spawn(move || thread_main(i))).collect::<Vec<_>>();
18     for thread in threads {
19         thread.join().unwrap();
20     }
21     println!("{:?}", start.elapsed());
22     println!("{:?}", OTHER.load(Ordering::Relaxed));
23 }
24 
25 #[inline(never)]
thread_main(i: usize)26 fn thread_main(i: usize) {
27     // The operations we do here don't really matter, as long as we do multiple writes, and
28     // everything is messy enough to prevent the compiler from optimizing the loop away.
29     let mut data = [i; 128];
30     let mut accum = 0usize;
31     for _ in 0..N_ROUNDS {
32         let _value = CELL.get_or_init(|| i + 1);
33         let k = OTHER.fetch_add(data[accum & 0x7F] as usize, Ordering::Relaxed);
34         for j in data.iter_mut() {
35             *j = (*j).wrapping_add(accum);
36             accum = accum.wrapping_add(k);
37         }
38     }
39 }
40