1 use criterion::*;
2 use rand::{Rng, SeedableRng};
3 use rand_chacha::ChaChaRng;
4 use rav1e::bench::cpu_features::*;
5 use rav1e::bench::frame::{AsRegion, PlaneOffset, PlaneSlice};
6 use rav1e::bench::mc::*;
7 use rav1e::bench::util::Aligned;
8 use rav1e::prelude::*;
9 
bench_put_8tap_top_left_lbd(c: &mut Criterion)10 fn bench_put_8tap_top_left_lbd(c: &mut Criterion) {
11   let mut ra = ChaChaRng::from_seed([0; 32]);
12   let cpu = CpuFeatureLevel::default();
13   let w = 640;
14   let h = 480;
15   let input_plane = new_plane::<u8>(&mut ra, w, h);
16   let mut dst_plane = new_plane::<u8>(&mut ra, w, h);
17 
18   let (row_frac, col_frac, src) = get_params(
19     &input_plane,
20     PlaneOffset { x: 0, y: 0 },
21     MotionVector { row: 0, col: 0 },
22   );
23   c.bench_function("put_8tap_top_left_lbd", |b| {
24     b.iter(|| {
25       let _ = black_box(put_8tap(
26         &mut dst_plane.as_region_mut(),
27         src,
28         8,
29         8,
30         col_frac,
31         row_frac,
32         FilterMode::REGULAR,
33         FilterMode::REGULAR,
34         8,
35         cpu,
36       ));
37     })
38   });
39 }
40 
bench_put_8tap_top_lbd(c: &mut Criterion)41 fn bench_put_8tap_top_lbd(c: &mut Criterion) {
42   let mut ra = ChaChaRng::from_seed([0; 32]);
43   let cpu = CpuFeatureLevel::default();
44   let w = 640;
45   let h = 480;
46   let input_plane = new_plane::<u8>(&mut ra, w, h);
47   let mut dst_plane = new_plane::<u8>(&mut ra, w, h);
48 
49   let (row_frac, col_frac, src) = get_params(
50     &input_plane,
51     PlaneOffset { x: 0, y: 0 },
52     MotionVector { row: 0, col: 4 },
53   );
54   c.bench_function("put_8tap_top_lbd", |b| {
55     b.iter(|| {
56       let _ = black_box(put_8tap(
57         &mut dst_plane.as_region_mut(),
58         src,
59         8,
60         8,
61         col_frac,
62         row_frac,
63         FilterMode::REGULAR,
64         FilterMode::REGULAR,
65         8,
66         cpu,
67       ));
68     })
69   });
70 }
71 
bench_put_8tap_left_lbd(c: &mut Criterion)72 fn bench_put_8tap_left_lbd(c: &mut Criterion) {
73   let mut ra = ChaChaRng::from_seed([0; 32]);
74   let cpu = CpuFeatureLevel::default();
75   let w = 640;
76   let h = 480;
77   let input_plane = new_plane::<u8>(&mut ra, w, h);
78   let mut dst_plane = new_plane::<u8>(&mut ra, w, h);
79 
80   let (row_frac, col_frac, src) = get_params(
81     &input_plane,
82     PlaneOffset { x: 0, y: 0 },
83     MotionVector { row: 4, col: 0 },
84   );
85   c.bench_function("put_8tap_left_lbd", |b| {
86     b.iter(|| {
87       let _ = black_box(put_8tap(
88         &mut dst_plane.as_region_mut(),
89         src,
90         8,
91         8,
92         col_frac,
93         row_frac,
94         FilterMode::REGULAR,
95         FilterMode::REGULAR,
96         8,
97         cpu,
98       ));
99     })
100   });
101 }
102 
bench_put_8tap_center_lbd(c: &mut Criterion)103 fn bench_put_8tap_center_lbd(c: &mut Criterion) {
104   let mut ra = ChaChaRng::from_seed([0; 32]);
105   let cpu = CpuFeatureLevel::default();
106   let w = 640;
107   let h = 480;
108   let input_plane = new_plane::<u8>(&mut ra, w, h);
109   let mut dst_plane = new_plane::<u8>(&mut ra, w, h);
110 
111   let (row_frac, col_frac, src) = get_params(
112     &input_plane,
113     PlaneOffset { x: 0, y: 0 },
114     MotionVector { row: 4, col: 4 },
115   );
116   c.bench_function("put_8tap_center_lbd", |b| {
117     b.iter(|| {
118       let _ = black_box(put_8tap(
119         &mut dst_plane.as_region_mut(),
120         src,
121         8,
122         8,
123         col_frac,
124         row_frac,
125         FilterMode::REGULAR,
126         FilterMode::REGULAR,
127         8,
128         cpu,
129       ));
130     })
131   });
132 }
133 
bench_put_8tap_top_left_hbd(c: &mut Criterion)134 fn bench_put_8tap_top_left_hbd(c: &mut Criterion) {
135   let mut ra = ChaChaRng::from_seed([0; 32]);
136   let cpu = CpuFeatureLevel::default();
137   let w = 640;
138   let h = 480;
139   let input_plane = new_plane::<u16>(&mut ra, w, h);
140   let mut dst_plane = new_plane::<u16>(&mut ra, w, h);
141 
142   let (row_frac, col_frac, src) = get_params(
143     &input_plane,
144     PlaneOffset { x: 0, y: 0 },
145     MotionVector { row: 0, col: 0 },
146   );
147   c.bench_function("put_8tap_top_left_hbd", |b| {
148     b.iter(|| {
149       let _ = black_box(put_8tap(
150         &mut dst_plane.as_region_mut(),
151         src,
152         8,
153         8,
154         col_frac,
155         row_frac,
156         FilterMode::REGULAR,
157         FilterMode::REGULAR,
158         10,
159         cpu,
160       ));
161     })
162   });
163 }
164 
bench_put_8tap_top_hbd(c: &mut Criterion)165 fn bench_put_8tap_top_hbd(c: &mut Criterion) {
166   let mut ra = ChaChaRng::from_seed([0; 32]);
167   let cpu = CpuFeatureLevel::default();
168   let w = 640;
169   let h = 480;
170   let input_plane = new_plane::<u16>(&mut ra, w, h);
171   let mut dst_plane = new_plane::<u16>(&mut ra, w, h);
172 
173   let (row_frac, col_frac, src) = get_params(
174     &input_plane,
175     PlaneOffset { x: 0, y: 0 },
176     MotionVector { row: 0, col: 4 },
177   );
178   c.bench_function("put_8tap_top_hbd", |b| {
179     b.iter(|| {
180       let _ = black_box(put_8tap(
181         &mut dst_plane.as_region_mut(),
182         src,
183         8,
184         8,
185         col_frac,
186         row_frac,
187         FilterMode::REGULAR,
188         FilterMode::REGULAR,
189         10,
190         cpu,
191       ));
192     })
193   });
194 }
195 
bench_put_8tap_left_hbd(c: &mut Criterion)196 fn bench_put_8tap_left_hbd(c: &mut Criterion) {
197   let mut ra = ChaChaRng::from_seed([0; 32]);
198   let cpu = CpuFeatureLevel::default();
199   let w = 640;
200   let h = 480;
201   let input_plane = new_plane::<u16>(&mut ra, w, h);
202   let mut dst_plane = new_plane::<u16>(&mut ra, w, h);
203 
204   let (row_frac, col_frac, src) = get_params(
205     &input_plane,
206     PlaneOffset { x: 0, y: 0 },
207     MotionVector { row: 4, col: 0 },
208   );
209   c.bench_function("put_8tap_left_hbd", |b| {
210     b.iter(|| {
211       let _ = black_box(put_8tap(
212         &mut dst_plane.as_region_mut(),
213         src,
214         8,
215         8,
216         col_frac,
217         row_frac,
218         FilterMode::REGULAR,
219         FilterMode::REGULAR,
220         10,
221         cpu,
222       ));
223     })
224   });
225 }
226 
bench_put_8tap_center_hbd(c: &mut Criterion)227 fn bench_put_8tap_center_hbd(c: &mut Criterion) {
228   let mut ra = ChaChaRng::from_seed([0; 32]);
229   let cpu = CpuFeatureLevel::default();
230   let w = 640;
231   let h = 480;
232   let input_plane = new_plane::<u16>(&mut ra, w, h);
233   let mut dst_plane = new_plane::<u16>(&mut ra, w, h);
234 
235   let (row_frac, col_frac, src) = get_params(
236     &input_plane,
237     PlaneOffset { x: 0, y: 0 },
238     MotionVector { row: 4, col: 4 },
239   );
240   c.bench_function("put_8tap_center_hbd", |b| {
241     b.iter(|| {
242       let _ = black_box(put_8tap(
243         &mut dst_plane.as_region_mut(),
244         src,
245         8,
246         8,
247         col_frac,
248         row_frac,
249         FilterMode::REGULAR,
250         FilterMode::REGULAR,
251         10,
252         cpu,
253       ));
254     })
255   });
256 }
257 
bench_prep_8tap_top_left_lbd(c: &mut Criterion)258 fn bench_prep_8tap_top_left_lbd(c: &mut Criterion) {
259   let mut ra = ChaChaRng::from_seed([0; 32]);
260   let cpu = CpuFeatureLevel::default();
261   let w = 640;
262   let h = 480;
263   let input_plane = new_plane::<u8>(&mut ra, w, h);
264   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
265 
266   let (row_frac, col_frac, src) = get_params(
267     &input_plane,
268     PlaneOffset { x: 0, y: 0 },
269     MotionVector { row: 0, col: 0 },
270   );
271   c.bench_function("prep_8tap_top_left_lbd", |b| {
272     b.iter(|| {
273       let _ = black_box(prep_8tap(
274         &mut dst.data,
275         src,
276         8,
277         8,
278         col_frac,
279         row_frac,
280         FilterMode::REGULAR,
281         FilterMode::REGULAR,
282         8,
283         cpu,
284       ));
285     })
286   });
287 }
288 
bench_prep_8tap_top_lbd(c: &mut Criterion)289 fn bench_prep_8tap_top_lbd(c: &mut Criterion) {
290   let mut ra = ChaChaRng::from_seed([0; 32]);
291   let cpu = CpuFeatureLevel::default();
292   let w = 640;
293   let h = 480;
294   let input_plane = new_plane::<u8>(&mut ra, w, h);
295   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
296 
297   let (row_frac, col_frac, src) = get_params(
298     &input_plane,
299     PlaneOffset { x: 0, y: 0 },
300     MotionVector { row: 0, col: 4 },
301   );
302   c.bench_function("prep_8tap_top_lbd", |b| {
303     b.iter(|| {
304       let _ = black_box(prep_8tap(
305         &mut dst.data,
306         src,
307         8,
308         8,
309         col_frac,
310         row_frac,
311         FilterMode::REGULAR,
312         FilterMode::REGULAR,
313         8,
314         cpu,
315       ));
316     })
317   });
318 }
319 
bench_prep_8tap_left_lbd(c: &mut Criterion)320 fn bench_prep_8tap_left_lbd(c: &mut Criterion) {
321   let mut ra = ChaChaRng::from_seed([0; 32]);
322   let cpu = CpuFeatureLevel::default();
323   let w = 640;
324   let h = 480;
325   let input_plane = new_plane::<u8>(&mut ra, w, h);
326   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
327 
328   let (row_frac, col_frac, src) = get_params(
329     &input_plane,
330     PlaneOffset { x: 0, y: 0 },
331     MotionVector { row: 4, col: 0 },
332   );
333   c.bench_function("prep_8tap_left_lbd", |b| {
334     b.iter(|| {
335       let _ = black_box(prep_8tap(
336         &mut dst.data,
337         src,
338         8,
339         8,
340         col_frac,
341         row_frac,
342         FilterMode::REGULAR,
343         FilterMode::REGULAR,
344         8,
345         cpu,
346       ));
347     })
348   });
349 }
350 
bench_prep_8tap_center_lbd(c: &mut Criterion)351 fn bench_prep_8tap_center_lbd(c: &mut Criterion) {
352   let mut ra = ChaChaRng::from_seed([0; 32]);
353   let cpu = CpuFeatureLevel::default();
354   let w = 640;
355   let h = 480;
356   let input_plane = new_plane::<u8>(&mut ra, w, h);
357   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
358 
359   let (row_frac, col_frac, src) = get_params(
360     &input_plane,
361     PlaneOffset { x: 0, y: 0 },
362     MotionVector { row: 4, col: 4 },
363   );
364   c.bench_function("prep_8tap_center_lbd", |b| {
365     b.iter(|| {
366       let _ = black_box(prep_8tap(
367         &mut dst.data,
368         src,
369         8,
370         8,
371         col_frac,
372         row_frac,
373         FilterMode::REGULAR,
374         FilterMode::REGULAR,
375         8,
376         cpu,
377       ));
378     })
379   });
380 }
381 
bench_prep_8tap_top_left_hbd(c: &mut Criterion)382 fn bench_prep_8tap_top_left_hbd(c: &mut Criterion) {
383   let mut ra = ChaChaRng::from_seed([0; 32]);
384   let cpu = CpuFeatureLevel::default();
385   let w = 640;
386   let h = 480;
387   let input_plane = new_plane::<u16>(&mut ra, w, h);
388   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
389 
390   let (row_frac, col_frac, src) = get_params(
391     &input_plane,
392     PlaneOffset { x: 0, y: 0 },
393     MotionVector { row: 0, col: 0 },
394   );
395   c.bench_function("prep_8tap_top_left_hbd", |b| {
396     b.iter(|| {
397       let _ = black_box(prep_8tap(
398         &mut dst.data,
399         src,
400         8,
401         8,
402         col_frac,
403         row_frac,
404         FilterMode::REGULAR,
405         FilterMode::REGULAR,
406         10,
407         cpu,
408       ));
409     })
410   });
411 }
412 
bench_prep_8tap_top_hbd(c: &mut Criterion)413 fn bench_prep_8tap_top_hbd(c: &mut Criterion) {
414   let mut ra = ChaChaRng::from_seed([0; 32]);
415   let cpu = CpuFeatureLevel::default();
416   let w = 640;
417   let h = 480;
418   let input_plane = new_plane::<u16>(&mut ra, w, h);
419   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
420 
421   let (row_frac, col_frac, src) = get_params(
422     &input_plane,
423     PlaneOffset { x: 0, y: 0 },
424     MotionVector { row: 0, col: 4 },
425   );
426   c.bench_function("prep_8tap_top_hbd", |b| {
427     b.iter(|| {
428       let _ = black_box(prep_8tap(
429         &mut dst.data,
430         src,
431         8,
432         8,
433         col_frac,
434         row_frac,
435         FilterMode::REGULAR,
436         FilterMode::REGULAR,
437         10,
438         cpu,
439       ));
440     })
441   });
442 }
443 
bench_prep_8tap_left_hbd(c: &mut Criterion)444 fn bench_prep_8tap_left_hbd(c: &mut Criterion) {
445   let mut ra = ChaChaRng::from_seed([0; 32]);
446   let cpu = CpuFeatureLevel::default();
447   let w = 640;
448   let h = 480;
449   let input_plane = new_plane::<u16>(&mut ra, w, h);
450   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
451 
452   let (row_frac, col_frac, src) = get_params(
453     &input_plane,
454     PlaneOffset { x: 0, y: 0 },
455     MotionVector { row: 4, col: 0 },
456   );
457   c.bench_function("prep_8tap_left_hbd", |b| {
458     b.iter(|| {
459       let _ = black_box(prep_8tap(
460         &mut dst.data,
461         src,
462         8,
463         8,
464         col_frac,
465         row_frac,
466         FilterMode::REGULAR,
467         FilterMode::REGULAR,
468         10,
469         cpu,
470       ));
471     })
472   });
473 }
474 
bench_prep_8tap_center_hbd(c: &mut Criterion)475 fn bench_prep_8tap_center_hbd(c: &mut Criterion) {
476   let mut ra = ChaChaRng::from_seed([0; 32]);
477   let cpu = CpuFeatureLevel::default();
478   let w = 640;
479   let h = 480;
480   let input_plane = new_plane::<u16>(&mut ra, w, h);
481   let mut dst = Aligned::<[i16; 128 * 128]>::uninitialized();
482 
483   let (row_frac, col_frac, src) = get_params(
484     &input_plane,
485     PlaneOffset { x: 0, y: 0 },
486     MotionVector { row: 4, col: 4 },
487   );
488   c.bench_function("prep_8tap_center_hbd", |b| {
489     b.iter(|| {
490       let _ = black_box(prep_8tap(
491         &mut dst.data,
492         src,
493         8,
494         8,
495         col_frac,
496         row_frac,
497         FilterMode::REGULAR,
498         FilterMode::REGULAR,
499         10,
500         cpu,
501       ));
502     })
503   });
504 }
505 
506 criterion_group!(
507   mc,
508   bench_put_8tap_top_left_lbd,
509   bench_put_8tap_top_lbd,
510   bench_put_8tap_left_lbd,
511   bench_put_8tap_center_lbd,
512   bench_put_8tap_top_left_hbd,
513   bench_put_8tap_top_hbd,
514   bench_put_8tap_left_hbd,
515   bench_put_8tap_center_hbd,
516   bench_prep_8tap_top_left_lbd,
517   bench_prep_8tap_top_lbd,
518   bench_prep_8tap_left_lbd,
519   bench_prep_8tap_center_lbd,
520   bench_prep_8tap_top_left_hbd,
521   bench_prep_8tap_top_hbd,
522   bench_prep_8tap_left_hbd,
523   bench_prep_8tap_center_hbd
524 );
525 
fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>)526 fn fill_plane<T: Pixel>(ra: &mut ChaChaRng, plane: &mut Plane<T>) {
527   let stride = plane.cfg.stride;
528   for row in plane.data_origin_mut().chunks_mut(stride) {
529     for pixel in row {
530       let v: u8 = ra.gen();
531       *pixel = T::cast_from(v);
532     }
533   }
534 }
535 
new_plane<T: Pixel>( ra: &mut ChaChaRng, width: usize, height: usize, ) -> Plane<T>536 fn new_plane<T: Pixel>(
537   ra: &mut ChaChaRng, width: usize, height: usize,
538 ) -> Plane<T> {
539   let mut p = Plane::new(width, height, 0, 0, 128 + 8, 128 + 8);
540 
541   fill_plane(ra, &mut p);
542 
543   p
544 }
545 
get_params<T: Pixel>( rec_plane: &Plane<T>, po: PlaneOffset, mv: MotionVector, ) -> (i32, i32, PlaneSlice<T>)546 fn get_params<T: Pixel>(
547   rec_plane: &Plane<T>, po: PlaneOffset, mv: MotionVector,
548 ) -> (i32, i32, PlaneSlice<T>) {
549   let rec_cfg = &rec_plane.cfg;
550   let shift_row = 3 + rec_cfg.ydec;
551   let shift_col = 3 + rec_cfg.xdec;
552   let row_offset = mv.row as i32 >> shift_row;
553   let col_offset = mv.col as i32 >> shift_col;
554   let row_frac =
555     (mv.row as i32 - (row_offset << shift_row)) << (4 - shift_row);
556   let col_frac =
557     (mv.col as i32 - (col_offset << shift_col)) << (4 - shift_col);
558   let qo = PlaneOffset {
559     x: po.x + col_offset as isize - 3,
560     y: po.y + row_offset as isize - 3,
561   };
562   (row_frac, col_frac, rec_plane.slice(qo).clamp().subslice(3, 3))
563 }
564