1 #![feature(test)]
2 
3 extern crate test;
4 extern crate itertools;
5 
6 use itertools::Itertools;
7 use itertools::cloned;
8 use test::Bencher;
9 
10 trait IterEx : Iterator {
11     // Another efficient implementation against which to compare,
12     // but needs `std` so is less desirable.
tree_fold1_vec<F>(self, mut f: F) -> Option<Self::Item> where F: FnMut(Self::Item, Self::Item) -> Self::Item, Self: Sized,13     fn tree_fold1_vec<F>(self, mut f: F) -> Option<Self::Item>
14         where F: FnMut(Self::Item, Self::Item) -> Self::Item,
15               Self: Sized,
16     {
17         let hint = self.size_hint().0;
18         let cap = std::mem::size_of::<usize>() * 8 - hint.leading_zeros() as usize;
19         let mut stack = Vec::with_capacity(cap);
20         self.enumerate().foreach(|(mut i, mut x)| {
21             while (i & 1) != 0 {
22                 x = f(stack.pop().unwrap(), x);
23                 i >>= 1;
24             }
25             stack.push(x);
26         });
27         stack.into_iter().fold1(f)
28     }
29 }
30 impl<T:Iterator> IterEx for T {}
31 
32 macro_rules! def_benchs {
33     ($N:expr,
34      $FUN:ident,
35      $BENCH_NAME:ident,
36      ) => (
37         mod $BENCH_NAME {
38         use super::*;
39 
40         #[bench]
41         fn sum(b: &mut Bencher) {
42             let v: Vec<u32> = (0.. $N).collect();
43             b.iter(|| {
44                 cloned(&v).$FUN(|x, y| x + y)
45             });
46         }
47 
48         #[bench]
49         fn complex_iter(b: &mut Bencher) {
50             let u = (3..).take($N / 2);
51             let v = (5..).take($N / 2);
52             let it = u.chain(v);
53 
54             b.iter(|| {
55                 it.clone().map(|x| x as f32).$FUN(f32::atan2)
56             });
57         }
58 
59         #[bench]
60         fn string_format(b: &mut Bencher) {
61             // This goes quadratic with linear `fold1`, so use a smaller
62             // size to not waste too much time in travis.  The allocations
63             // in here are so expensive anyway that it'll still take
64             // way longer per iteration than the other two benchmarks.
65             let v: Vec<u32> = (0.. ($N/4)).collect();
66             b.iter(|| {
67                 cloned(&v).map(|x| x.to_string()).$FUN(|x, y| format!("{} + {}", x, y))
68             });
69         }
70         }
71     )
72 }
73 
74 def_benchs!{
75     10_000,
76     fold1,
77     fold1_10k,
78 }
79 
80 def_benchs!{
81     10_000,
82     tree_fold1,
83     tree_fold1_stack_10k,
84 }
85 
86 def_benchs!{
87     10_000,
88     tree_fold1_vec,
89     tree_fold1_vec_10k,
90 }
91 
92 def_benchs!{
93     100,
94     fold1,
95     fold1_100,
96 }
97 
98 def_benchs!{
99     100,
100     tree_fold1,
101     tree_fold1_stack_100,
102 }
103 
104 def_benchs!{
105     100,
106     tree_fold1_vec,
107     tree_fold1_vec_100,
108 }
109 
110 def_benchs!{
111     8,
112     fold1,
113     fold1_08,
114 }
115 
116 def_benchs!{
117     8,
118     tree_fold1,
119     tree_fold1_stack_08,
120 }
121 
122 def_benchs!{
123     8,
124     tree_fold1_vec,
125     tree_fold1_vec_08,
126 }
127