1 use std::iter::IntoIterator;
2 use std::time::Duration;
3 use std::time::Instant;
4 
5 use crate::black_box;
6 use crate::measurement::{Measurement, WallTime};
7 use crate::BatchSize;
8 
9 #[cfg(feature = "async")]
10 use std::future::Future;
11 
12 #[cfg(feature = "async")]
13 use crate::async_executor::AsyncExecutor;
14 
15 // ================================== MAINTENANCE NOTE =============================================
16 // Any changes made to either Bencher or AsyncBencher will have to be replicated to the other!
17 // ================================== MAINTENANCE NOTE =============================================
18 
19 /// Timer struct used to iterate a benchmarked function and measure the runtime.
20 ///
21 /// This struct provides different timing loops as methods. Each timing loop provides a different
22 /// way to time a routine and each has advantages and disadvantages.
23 ///
24 /// * If you want to do the iteration and measurement yourself (eg. passing the iteration count
25 ///   to a separate process), use `iter_custom`.
26 /// * If your routine requires no per-iteration setup and returns a value with an expensive `drop`
27 ///   method, use `iter_with_large_drop`.
28 /// * If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched`
29 ///   or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes.
30 ///   If the setup value implements `Drop` and you don't want to include the `drop` time in the
31 ///   measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also
32 ///   suitable for benchmarking routines which return a value with an expensive `drop` method,
33 ///   but are more complex than `iter_with_large_drop`.
34 /// * Otherwise, use `iter`.
35 pub struct Bencher<'a, M: Measurement = WallTime> {
36     pub(crate) iterated: bool,         // Have we iterated this benchmark?
37     pub(crate) iters: u64,             // Number of times to iterate this benchmark
38     pub(crate) value: M::Value,        // The measured value
39     pub(crate) measurement: &'a M,     // Reference to the measurement object
40     pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period.
41 }
42 impl<'a, M: Measurement> Bencher<'a, M> {
43     /// Times a `routine` by executing it many times and timing the total elapsed time.
44     ///
45     /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
46     ///
47     /// # Timing model
48     ///
49     /// Note that the `Bencher` also times the time required to destroy the output of `routine()`.
50     /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
51     /// to the runtime of the `routine`.
52     ///
53     /// ```text
54     /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
55     /// ```
56     ///
57     /// # Example
58     ///
59     /// ```rust
60     /// #[macro_use] extern crate criterion;
61     ///
62     /// use criterion::*;
63     ///
64     /// // The function to benchmark
65     /// fn foo() {
66     ///     // ...
67     /// }
68     ///
69     /// fn bench(c: &mut Criterion) {
70     ///     c.bench_function("iter", move |b| {
71     ///         b.iter(|| foo())
72     ///     });
73     /// }
74     ///
75     /// criterion_group!(benches, bench);
76     /// criterion_main!(benches);
77     /// ```
78     ///
79     #[inline(never)]
iter<O, R>(&mut self, mut routine: R) where R: FnMut() -> O,80     pub fn iter<O, R>(&mut self, mut routine: R)
81     where
82         R: FnMut() -> O,
83     {
84         self.iterated = true;
85         let time_start = Instant::now();
86         let start = self.measurement.start();
87         for _ in 0..self.iters {
88             black_box(routine());
89         }
90         self.value = self.measurement.end(start);
91         self.elapsed_time = time_start.elapsed();
92     }
93 
94     /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
95     ///
96     /// Prefer this timing loop in cases where `routine` has to do its own measurements to
97     /// get accurate timing information (for example in multi-threaded scenarios where you spawn
98     /// and coordinate with multiple threads).
99     ///
100     /// # Timing model
101     /// Custom, the timing model is whatever is returned as the Duration from `routine`.
102     ///
103     /// # Example
104     /// ```rust
105     /// #[macro_use] extern crate criterion;
106     /// use criterion::*;
107     /// use criterion::black_box;
108     /// use std::time::Instant;
109     ///
110     /// fn foo() {
111     ///     // ...
112     /// }
113     ///
114     /// fn bench(c: &mut Criterion) {
115     ///     c.bench_function("iter", move |b| {
116     ///         b.iter_custom(|iters| {
117     ///             let start = Instant::now();
118     ///             for _i in 0..iters {
119     ///                 black_box(foo());
120     ///             }
121     ///             start.elapsed()
122     ///         })
123     ///     });
124     /// }
125     ///
126     /// criterion_group!(benches, bench);
127     /// criterion_main!(benches);
128     /// ```
129     ///
130     #[inline(never)]
iter_custom<R>(&mut self, mut routine: R) where R: FnMut(u64) -> M::Value,131     pub fn iter_custom<R>(&mut self, mut routine: R)
132     where
133         R: FnMut(u64) -> M::Value,
134     {
135         self.iterated = true;
136         let time_start = Instant::now();
137         self.value = routine(self.iters);
138         self.elapsed_time = time_start.elapsed();
139     }
140 
141     #[doc(hidden)]
iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> O,142     pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
143     where
144         S: FnMut() -> I,
145         R: FnMut(I) -> O,
146     {
147         self.iter_batched(setup, routine, BatchSize::PerIteration);
148     }
149 
150     /// Times a `routine` by collecting its output on each iteration. This avoids timing the
151     /// destructor of the value returned by `routine`.
152     ///
153     /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
154     /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
155     ///
156     /// # Timing model
157     ///
158     /// ``` text
159     /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
160     /// ```
161     ///
162     /// # Example
163     ///
164     /// ```rust
165     /// #[macro_use] extern crate criterion;
166     ///
167     /// use criterion::*;
168     ///
169     /// fn create_vector() -> Vec<u64> {
170     ///     # vec![]
171     ///     // ...
172     /// }
173     ///
174     /// fn bench(c: &mut Criterion) {
175     ///     c.bench_function("with_drop", move |b| {
176     ///         // This will avoid timing the Vec::drop.
177     ///         b.iter_with_large_drop(|| create_vector())
178     ///     });
179     /// }
180     ///
181     /// criterion_group!(benches, bench);
182     /// criterion_main!(benches);
183     /// ```
184     ///
iter_with_large_drop<O, R>(&mut self, mut routine: R) where R: FnMut() -> O,185     pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
186     where
187         R: FnMut() -> O,
188     {
189         self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
190     }
191 
192     #[doc(hidden)]
iter_with_large_setup<I, O, S, R>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> O,193     pub fn iter_with_large_setup<I, O, S, R>(&mut self, setup: S, routine: R)
194     where
195         S: FnMut() -> I,
196         R: FnMut(I) -> O,
197     {
198         self.iter_batched(setup, routine, BatchSize::NumBatches(1));
199     }
200 
201     /// Times a `routine` that requires some input by generating a batch of input, then timing the
202     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
203     /// details on choosing the batch size. Use this when the routine must consume its input.
204     ///
205     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
206     /// data on each iteration.
207     ///
208     /// # Timing model
209     ///
210     /// ```text
211     /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
212     /// ```
213     ///
214     /// # Example
215     ///
216     /// ```rust
217     /// #[macro_use] extern crate criterion;
218     ///
219     /// use criterion::*;
220     ///
221     /// fn create_scrambled_data() -> Vec<u64> {
222     ///     # vec![]
223     ///     // ...
224     /// }
225     ///
226     /// // The sorting algorithm to test
227     /// fn sort(data: &mut [u64]) {
228     ///     // ...
229     /// }
230     ///
231     /// fn bench(c: &mut Criterion) {
232     ///     let data = create_scrambled_data();
233     ///
234     ///     c.bench_function("with_setup", move |b| {
235     ///         // This will avoid timing the to_vec call.
236     ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
237     ///     });
238     /// }
239     ///
240     /// criterion_group!(benches, bench);
241     /// criterion_main!(benches);
242     /// ```
243     ///
244     #[inline(never)]
iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(I) -> O,245     pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
246     where
247         S: FnMut() -> I,
248         R: FnMut(I) -> O,
249     {
250         self.iterated = true;
251         let batch_size = size.iters_per_batch(self.iters);
252         assert!(batch_size != 0, "Batch size must not be zero.");
253         let time_start = Instant::now();
254         self.value = self.measurement.zero();
255 
256         if batch_size == 1 {
257             for _ in 0..self.iters {
258                 let input = black_box(setup());
259 
260                 let start = self.measurement.start();
261                 let output = routine(input);
262                 let end = self.measurement.end(start);
263                 self.value = self.measurement.add(&self.value, &end);
264 
265                 drop(black_box(output));
266             }
267         } else {
268             let mut iteration_counter = 0;
269 
270             while iteration_counter < self.iters {
271                 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
272 
273                 let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
274                 let mut outputs = Vec::with_capacity(batch_size as usize);
275 
276                 let start = self.measurement.start();
277                 outputs.extend(inputs.into_iter().map(&mut routine));
278                 let end = self.measurement.end(start);
279                 self.value = self.measurement.add(&self.value, &end);
280 
281                 black_box(outputs);
282 
283                 iteration_counter += batch_size;
284             }
285         }
286 
287         self.elapsed_time = time_start.elapsed();
288     }
289 
290     /// Times a `routine` that requires some input by generating a batch of input, then timing the
291     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
292     /// details on choosing the batch size. Use this when the routine should accept the input by
293     /// mutable reference.
294     ///
295     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
296     /// data on each iteration.
297     ///
298     /// # Timing model
299     ///
300     /// ```text
301     /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
302     /// ```
303     ///
304     /// # Example
305     ///
306     /// ```rust
307     /// #[macro_use] extern crate criterion;
308     ///
309     /// use criterion::*;
310     ///
311     /// fn create_scrambled_data() -> Vec<u64> {
312     ///     # vec![]
313     ///     // ...
314     /// }
315     ///
316     /// // The sorting algorithm to test
317     /// fn sort(data: &mut [u64]) {
318     ///     // ...
319     /// }
320     ///
321     /// fn bench(c: &mut Criterion) {
322     ///     let data = create_scrambled_data();
323     ///
324     ///     c.bench_function("with_setup", move |b| {
325     ///         // This will avoid timing the to_vec call.
326     ///         b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput)
327     ///     });
328     /// }
329     ///
330     /// criterion_group!(benches, bench);
331     /// criterion_main!(benches);
332     /// ```
333     ///
334     #[inline(never)]
iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(&mut I) -> O,335     pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
336     where
337         S: FnMut() -> I,
338         R: FnMut(&mut I) -> O,
339     {
340         self.iterated = true;
341         let batch_size = size.iters_per_batch(self.iters);
342         assert!(batch_size != 0, "Batch size must not be zero.");
343         let time_start = Instant::now();
344         self.value = self.measurement.zero();
345 
346         if batch_size == 1 {
347             for _ in 0..self.iters {
348                 let mut input = black_box(setup());
349 
350                 let start = self.measurement.start();
351                 let output = routine(&mut input);
352                 let end = self.measurement.end(start);
353                 self.value = self.measurement.add(&self.value, &end);
354 
355                 drop(black_box(output));
356                 drop(black_box(input));
357             }
358         } else {
359             let mut iteration_counter = 0;
360 
361             while iteration_counter < self.iters {
362                 let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter);
363 
364                 let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
365                 let mut outputs = Vec::with_capacity(batch_size as usize);
366 
367                 let start = self.measurement.start();
368                 outputs.extend(inputs.iter_mut().map(&mut routine));
369                 let end = self.measurement.end(start);
370                 self.value = self.measurement.add(&self.value, &end);
371 
372                 black_box(outputs);
373 
374                 iteration_counter += batch_size;
375             }
376         }
377         self.elapsed_time = time_start.elapsed();
378     }
379 
380     // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly
381     // if they don't.
assert_iterated(&mut self)382     pub(crate) fn assert_iterated(&mut self) {
383         if !self.iterated {
384             panic!("Benchmark function must call Bencher::iter or related method.");
385         }
386         self.iterated = false;
387     }
388 
389     /// Convert this bencher into an AsyncBencher, which enables async/await support.
390     #[cfg(feature = "async")]
to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M>391     pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> {
392         AsyncBencher { b: self, runner }
393     }
394 }
395 
396 /// Async/await variant of the Bencher struct.
397 #[cfg(feature = "async")]
398 pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> {
399     b: &'b mut Bencher<'a, M>,
400     runner: A,
401 }
402 #[cfg(feature = "async")]
403 impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> {
404     /// Times a `routine` by executing it many times and timing the total elapsed time.
405     ///
406     /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor.
407     ///
408     /// # Timing model
409     ///
410     /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`.
411     /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared
412     /// to the runtime of the `routine`.
413     ///
414     /// ```text
415     /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
416     /// ```
417     ///
418     /// # Example
419     ///
420     /// ```rust
421     /// #[macro_use] extern crate criterion;
422     ///
423     /// use criterion::*;
424     /// use criterion::async_executor::FuturesExecutor;
425     ///
426     /// // The function to benchmark
427     /// async fn foo() {
428     ///     // ...
429     /// }
430     ///
431     /// fn bench(c: &mut Criterion) {
432     ///     c.bench_function("iter", move |b| {
433     ///         b.to_async(FuturesExecutor).iter(|| async { foo().await } )
434     ///     });
435     /// }
436     ///
437     /// criterion_group!(benches, bench);
438     /// criterion_main!(benches);
439     /// ```
440     ///
441     #[inline(never)]
iter<O, R, F>(&mut self, mut routine: R) where R: FnMut() -> F, F: Future<Output = O>,442     pub fn iter<O, R, F>(&mut self, mut routine: R)
443     where
444         R: FnMut() -> F,
445         F: Future<Output = O>,
446     {
447         let AsyncBencher { b, runner } = self;
448         runner.block_on(async {
449             b.iterated = true;
450             let time_start = Instant::now();
451             let start = b.measurement.start();
452             for _ in 0..b.iters {
453                 black_box(routine().await);
454             }
455             b.value = b.measurement.end(start);
456             b.elapsed_time = time_start.elapsed();
457         });
458     }
459 
460     /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time.
461     ///
462     /// Prefer this timing loop in cases where `routine` has to do its own measurements to
463     /// get accurate timing information (for example in multi-threaded scenarios where you spawn
464     /// and coordinate with multiple threads).
465     ///
466     /// # Timing model
467     /// Custom, the timing model is whatever is returned as the Duration from `routine`.
468     ///
469     /// # Example
470     /// ```rust
471     /// #[macro_use] extern crate criterion;
472     /// use criterion::*;
473     /// use criterion::black_box;
474     /// use criterion::async_executor::FuturesExecutor;
475     /// use std::time::Instant;
476     ///
477     /// async fn foo() {
478     ///     // ...
479     /// }
480     ///
481     /// fn bench(c: &mut Criterion) {
482     ///     c.bench_function("iter", move |b| {
483     ///         b.to_async(FuturesExecutor).iter_custom(|iters| {
484     ///             async move {
485     ///                 let start = Instant::now();
486     ///                 for _i in 0..iters {
487     ///                     black_box(foo().await);
488     ///                 }
489     ///                 start.elapsed()
490     ///             }
491     ///         })
492     ///     });
493     /// }
494     ///
495     /// criterion_group!(benches, bench);
496     /// criterion_main!(benches);
497     /// ```
498     ///
499     #[inline(never)]
iter_custom<R, F>(&mut self, mut routine: R) where R: FnMut(u64) -> F, F: Future<Output = M::Value>,500     pub fn iter_custom<R, F>(&mut self, mut routine: R)
501     where
502         R: FnMut(u64) -> F,
503         F: Future<Output = M::Value>,
504     {
505         let AsyncBencher { b, runner } = self;
506         runner.block_on(async {
507             b.iterated = true;
508             let time_start = Instant::now();
509             b.value = routine(b.iters).await;
510             b.elapsed_time = time_start.elapsed();
511         })
512     }
513 
514     #[doc(hidden)]
iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,515     pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
516     where
517         S: FnMut() -> I,
518         R: FnMut(I) -> F,
519         F: Future<Output = O>,
520     {
521         self.iter_batched(setup, routine, BatchSize::PerIteration);
522     }
523 
524     /// Times a `routine` by collecting its output on each iteration. This avoids timing the
525     /// destructor of the value returned by `routine`.
526     ///
527     /// WARNING: This requires `O(iters * mem::size_of::<O>())` of memory, and `iters` is not under the
528     /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead.
529     ///
530     /// # Timing model
531     ///
532     /// ``` text
533     /// elapsed = Instant::now + iters * (routine) + Iterator::collect::<Vec<_>>
534     /// ```
535     ///
536     /// # Example
537     ///
538     /// ```rust
539     /// #[macro_use] extern crate criterion;
540     ///
541     /// use criterion::*;
542     /// use criterion::async_executor::FuturesExecutor;
543     ///
544     /// async fn create_vector() -> Vec<u64> {
545     ///     # vec![]
546     ///     // ...
547     /// }
548     ///
549     /// fn bench(c: &mut Criterion) {
550     ///     c.bench_function("with_drop", move |b| {
551     ///         // This will avoid timing the Vec::drop.
552     ///         b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await })
553     ///     });
554     /// }
555     ///
556     /// criterion_group!(benches, bench);
557     /// criterion_main!(benches);
558     /// ```
559     ///
iter_with_large_drop<O, R, F>(&mut self, mut routine: R) where R: FnMut() -> F, F: Future<Output = O>,560     pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
561     where
562         R: FnMut() -> F,
563         F: Future<Output = O>,
564     {
565         self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
566     }
567 
568     #[doc(hidden)]
iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,569     pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
570     where
571         S: FnMut() -> I,
572         R: FnMut(I) -> F,
573         F: Future<Output = O>,
574     {
575         self.iter_batched(setup, routine, BatchSize::NumBatches(1));
576     }
577 
578     /// Times a `routine` that requires some input by generating a batch of input, then timing the
579     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
580     /// details on choosing the batch size. Use this when the routine must consume its input.
581     ///
582     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
583     /// data on each iteration.
584     ///
585     /// # Timing model
586     ///
587     /// ```text
588     /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend
589     /// ```
590     ///
591     /// # Example
592     ///
593     /// ```rust
594     /// #[macro_use] extern crate criterion;
595     ///
596     /// use criterion::*;
597     /// use criterion::async_executor::FuturesExecutor;
598     ///
599     /// fn create_scrambled_data() -> Vec<u64> {
600     ///     # vec![]
601     ///     // ...
602     /// }
603     ///
604     /// // The sorting algorithm to test
605     /// async fn sort(data: &mut [u64]) {
606     ///     // ...
607     /// }
608     ///
609     /// fn bench(c: &mut Criterion) {
610     ///     let data = create_scrambled_data();
611     ///
612     ///     c.bench_function("with_setup", move |b| {
613     ///         // This will avoid timing the to_vec call.
614     ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
615     ///     });
616     /// }
617     ///
618     /// criterion_group!(benches, bench);
619     /// criterion_main!(benches);
620     /// ```
621     ///
622     #[inline(never)]
iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(I) -> F, F: Future<Output = O>,623     pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
624     where
625         S: FnMut() -> I,
626         R: FnMut(I) -> F,
627         F: Future<Output = O>,
628     {
629         let AsyncBencher { b, runner } = self;
630         runner.block_on(async {
631             b.iterated = true;
632             let batch_size = size.iters_per_batch(b.iters);
633             assert!(batch_size != 0, "Batch size must not be zero.");
634             let time_start = Instant::now();
635             b.value = b.measurement.zero();
636 
637             if batch_size == 1 {
638                 for _ in 0..b.iters {
639                     let input = black_box(setup());
640 
641                     let start = b.measurement.start();
642                     let output = routine(input).await;
643                     let end = b.measurement.end(start);
644                     b.value = b.measurement.add(&b.value, &end);
645 
646                     drop(black_box(output));
647                 }
648             } else {
649                 let mut iteration_counter = 0;
650 
651                 while iteration_counter < b.iters {
652                     let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
653 
654                     let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
655                     let mut outputs = Vec::with_capacity(batch_size as usize);
656 
657                     let start = b.measurement.start();
658                     // Can't use .extend here like the sync version does
659                     for input in inputs {
660                         outputs.push(routine(input).await);
661                     }
662                     let end = b.measurement.end(start);
663                     b.value = b.measurement.add(&b.value, &end);
664 
665                     black_box(outputs);
666 
667                     iteration_counter += batch_size;
668                 }
669             }
670 
671             b.elapsed_time = time_start.elapsed();
672         })
673     }
674 
675     /// Times a `routine` that requires some input by generating a batch of input, then timing the
676     /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for
677     /// details on choosing the batch size. Use this when the routine should accept the input by
678     /// mutable reference.
679     ///
680     /// For example, use this loop to benchmark sorting algorithms, because they require unsorted
681     /// data on each iteration.
682     ///
683     /// # Timing model
684     ///
685     /// ```text
686     /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend
687     /// ```
688     ///
689     /// # Example
690     ///
691     /// ```rust
692     /// #[macro_use] extern crate criterion;
693     ///
694     /// use criterion::*;
695     /// use criterion::async_executor::FuturesExecutor;
696     ///
697     /// fn create_scrambled_data() -> Vec<u64> {
698     ///     # vec![]
699     ///     // ...
700     /// }
701     ///
702     /// // The sorting algorithm to test
703     /// async fn sort(data: &mut [u64]) {
704     ///     // ...
705     /// }
706     ///
707     /// fn bench(c: &mut Criterion) {
708     ///     let data = create_scrambled_data();
709     ///
710     ///     c.bench_function("with_setup", move |b| {
711     ///         // This will avoid timing the to_vec call.
712     ///         b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput)
713     ///     });
714     /// }
715     ///
716     /// criterion_group!(benches, bench);
717     /// criterion_main!(benches);
718     /// ```
719     ///
720     #[inline(never)]
iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize) where S: FnMut() -> I, R: FnMut(&mut I) -> F, F: Future<Output = O>,721     pub fn iter_batched_ref<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, size: BatchSize)
722     where
723         S: FnMut() -> I,
724         R: FnMut(&mut I) -> F,
725         F: Future<Output = O>,
726     {
727         let AsyncBencher { b, runner } = self;
728         runner.block_on(async {
729             b.iterated = true;
730             let batch_size = size.iters_per_batch(b.iters);
731             assert!(batch_size != 0, "Batch size must not be zero.");
732             let time_start = Instant::now();
733             b.value = b.measurement.zero();
734 
735             if batch_size == 1 {
736                 for _ in 0..b.iters {
737                     let mut input = black_box(setup());
738 
739                     let start = b.measurement.start();
740                     let output = routine(&mut input).await;
741                     let end = b.measurement.end(start);
742                     b.value = b.measurement.add(&b.value, &end);
743 
744                     drop(black_box(output));
745                     drop(black_box(input));
746                 }
747             } else {
748                 let mut iteration_counter = 0;
749 
750                 while iteration_counter < b.iters {
751                     let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter);
752 
753                     let inputs = black_box((0..batch_size).map(|_| setup()).collect::<Vec<_>>());
754                     let mut outputs = Vec::with_capacity(batch_size as usize);
755 
756                     let start = b.measurement.start();
757                     // Can't use .extend here like the sync version does
758                     for mut input in inputs {
759                         outputs.push(routine(&mut input).await);
760                     }
761                     let end = b.measurement.end(start);
762                     b.value = b.measurement.add(&b.value, &end);
763 
764                     black_box(outputs);
765 
766                     iteration_counter += batch_size;
767                 }
768             }
769             b.elapsed_time = time_start.elapsed();
770         });
771     }
772 }
773