1 // Copyright (c) 2019-2021, The rav1e contributors. All rights reserved
2 //
3 // This source code is subject to the terms of the BSD 2 Clause License and
4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5 // was not distributed with this source code in the LICENSE file, you can
6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
7 // Media Patent License 1.0 was not distributed with this source code in the
8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9 
10 use crate::context::MAX_TX_SIZE;
11 use crate::cpu_features::CpuFeatureLevel;
12 use crate::predict::{
13   rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
14 };
15 use crate::tiling::PlaneRegionMut;
16 use crate::transform::TxSize;
17 use crate::util::Aligned;
18 use crate::Pixel;
19 use v_frame::pixel::PixelType;
20 
21 macro_rules! decl_angular_ipred_fn {
22   ($($f:ident),+) => {
23     extern {
24       $(
25         fn $f(
26           dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
27           width: libc::c_int, height: libc::c_int, angle: libc::c_int,
28         );
29       )*
30     }
31   };
32 }
33 
34 decl_angular_ipred_fn! {
35   rav1e_ipred_dc_8bpc_avx2,
36   rav1e_ipred_dc_8bpc_ssse3,
37   rav1e_ipred_dc_128_8bpc_avx2,
38   rav1e_ipred_dc_128_8bpc_ssse3,
39   rav1e_ipred_dc_left_8bpc_avx2,
40   rav1e_ipred_dc_left_8bpc_ssse3,
41   rav1e_ipred_dc_top_8bpc_avx2,
42   rav1e_ipred_dc_top_8bpc_ssse3,
43   rav1e_ipred_v_8bpc_avx2,
44   rav1e_ipred_v_8bpc_ssse3,
45   rav1e_ipred_h_8bpc_avx2,
46   rav1e_ipred_h_8bpc_ssse3,
47   rav1e_ipred_z1_8bpc_avx2,
48   rav1e_ipred_z3_8bpc_avx2,
49   rav1e_ipred_smooth_8bpc_avx2,
50   rav1e_ipred_smooth_8bpc_ssse3,
51   rav1e_ipred_smooth_v_8bpc_avx2,
52   rav1e_ipred_smooth_v_8bpc_ssse3,
53   rav1e_ipred_smooth_h_8bpc_avx2,
54   rav1e_ipred_smooth_h_8bpc_ssse3,
55   rav1e_ipred_paeth_8bpc_avx2,
56   rav1e_ipred_paeth_8bpc_ssse3
57 }
58 
59 macro_rules! decl_angular_ipred_hbd_fn {
60   ($($f:ident),+) => {
61     extern {
62       $(
63         fn $f(
64           dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
65           width: libc::c_int, height: libc::c_int, angle: libc::c_int,
66           max_width: libc::c_int, max_height: libc::c_int,
67           bit_depth_max: libc::c_int,
68         );
69       )*
70     }
71   };
72 }
73 
74 decl_angular_ipred_hbd_fn! {
75   rav1e_ipred_dc_16bpc_avx2,
76   rav1e_ipred_dc_128_16bpc_avx2,
77   rav1e_ipred_dc_left_16bpc_avx2,
78   rav1e_ipred_dc_top_16bpc_avx2,
79   rav1e_ipred_v_16bpc_avx2,
80   rav1e_ipred_h_16bpc_avx2,
81   rav1e_ipred_z1_16bpc_avx2,
82   rav1e_ipred_z3_16bpc_avx2,
83   rav1e_ipred_smooth_16bpc_avx2,
84   rav1e_ipred_smooth_v_16bpc_avx2,
85   rav1e_ipred_smooth_h_16bpc_avx2,
86   rav1e_ipred_paeth_16bpc_avx2
87 }
88 
89 // For z2 prediction, we need to provide extra parameters, dx and dy, which indicate
90 // the distance between the predicted block's top-left pixel and the frame's edge.
91 // It is required for the intra edge filtering process.
92 extern {
rav1e_ipred_z2_8bpc_avx2( dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8, width: libc::c_int, height: libc::c_int, angle: libc::c_int, dx: libc::c_int, dy: libc::c_int, )93   fn rav1e_ipred_z2_8bpc_avx2(
94     dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
95     width: libc::c_int, height: libc::c_int, angle: libc::c_int,
96     dx: libc::c_int, dy: libc::c_int,
97   );
98 
rav1e_ipred_z2_16bpc_avx2( dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16, width: libc::c_int, height: libc::c_int, angle: libc::c_int, dx: libc::c_int, dy: libc::c_int, bit_depth_max: libc::c_int, )99   fn rav1e_ipred_z2_16bpc_avx2(
100     dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
101     width: libc::c_int, height: libc::c_int, angle: libc::c_int,
102     dx: libc::c_int, dy: libc::c_int, bit_depth_max: libc::c_int,
103   );
104 }
105 
106 macro_rules! decl_cfl_pred_fn {
107   ($($f:ident),+) => {
108     extern {
109       $(
110         fn $f(
111           dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
112           width: libc::c_int, height: libc::c_int, ac: *const i16,
113           alpha: libc::c_int,
114         );
115       )*
116     }
117   };
118 }
119 
120 decl_cfl_pred_fn! {
121   rav1e_ipred_cfl_8bpc_avx2,
122   rav1e_ipred_cfl_8bpc_ssse3,
123   rav1e_ipred_cfl_128_8bpc_avx2,
124   rav1e_ipred_cfl_128_8bpc_ssse3,
125   rav1e_ipred_cfl_left_8bpc_avx2,
126   rav1e_ipred_cfl_left_8bpc_ssse3,
127   rav1e_ipred_cfl_top_8bpc_avx2,
128   rav1e_ipred_cfl_top_8bpc_ssse3
129 }
130 
131 macro_rules! decl_cfl_pred_hbd_fn {
132   ($($f:ident),+) => {
133     extern {
134       $(
135         fn $f(
136           dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
137           width: libc::c_int, height: libc::c_int, ac: *const i16,
138           alpha: libc::c_int, bit_depth_max: libc::c_int,
139         );
140       )*
141     }
142   };
143 }
144 
145 decl_cfl_pred_hbd_fn! {
146   rav1e_ipred_cfl_16bpc_avx2,
147   rav1e_ipred_cfl_128_16bpc_avx2,
148   rav1e_ipred_cfl_left_16bpc_avx2,
149   rav1e_ipred_cfl_top_16bpc_avx2
150 }
151 
152 #[inline(always)]
dispatch_predict_intra<T: Pixel>( mode: PredictionMode, variant: PredictionVariant, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>, edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel, )153 pub fn dispatch_predict_intra<T: Pixel>(
154   mode: PredictionMode, variant: PredictionVariant,
155   dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
156   ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
157   edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
158 ) {
159   let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
160     rust::dispatch_predict_intra(
161       mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf,
162       cpu,
163     );
164   };
165 
166   unsafe {
167     let stride = T::to_asm_stride(dst.plane_cfg.stride) as libc::ptrdiff_t;
168     let w = tx_size.width() as libc::c_int;
169     let h = tx_size.height() as libc::c_int;
170     let angle = angle as libc::c_int;
171 
172     match T::type_enum() {
173       PixelType::U8 if cpu >= CpuFeatureLevel::SSSE3 => {
174         let dst_ptr = dst.data_ptr_mut() as *mut _;
175         let edge_ptr =
176           edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
177         if cpu >= CpuFeatureLevel::AVX2 {
178           match mode {
179             PredictionMode::DC_PRED => {
180               (match variant {
181                 PredictionVariant::NONE => rav1e_ipred_dc_128_8bpc_avx2,
182                 PredictionVariant::LEFT => rav1e_ipred_dc_left_8bpc_avx2,
183                 PredictionVariant::TOP => rav1e_ipred_dc_top_8bpc_avx2,
184                 PredictionVariant::BOTH => rav1e_ipred_dc_8bpc_avx2,
185               })(dst_ptr, stride, edge_ptr, w, h, angle);
186             }
187             PredictionMode::V_PRED if angle == 90 => {
188               rav1e_ipred_v_8bpc_avx2(dst_ptr, stride, edge_ptr, w, h, angle);
189             }
190             PredictionMode::H_PRED if angle == 180 => {
191               rav1e_ipred_h_8bpc_avx2(dst_ptr, stride, edge_ptr, w, h, angle);
192             }
193             PredictionMode::V_PRED
194             | PredictionMode::H_PRED
195             | PredictionMode::D45_PRED
196             | PredictionMode::D135_PRED
197             | PredictionMode::D113_PRED
198             | PredictionMode::D157_PRED
199             | PredictionMode::D203_PRED
200             | PredictionMode::D67_PRED => {
201               let (enable_ief, ief_smooth_filter) =
202                 if let Some(params) = ief_params {
203                   (
204                     true as libc::c_int,
205                     params.use_smooth_filter() as libc::c_int,
206                   )
207                 } else {
208                   (false as libc::c_int, false as libc::c_int)
209                 };
210 
211               // dav1d assembly uses the unused integer bits to hold IEF parameters
212               let angle_arg =
213                 angle | (enable_ief << 10) | (ief_smooth_filter << 9);
214 
215               // From dav1d, bw and bh are the frame width and height rounded to 8px units
216               let (bw, bh) = (
217                 ((dst.plane_cfg.width + 7) >> 3) << 3,
218                 ((dst.plane_cfg.height + 7) >> 3) << 3,
219               );
220               // From dav1d, dx and dy are the distance from the predicted block to the frame edge
221               let (dx, dy) = (
222                 (bw as isize - dst.rect().x as isize) as libc::c_int,
223                 (bh as isize - dst.rect().y as isize) as libc::c_int,
224               );
225 
226               if angle <= 90 {
227                 rav1e_ipred_z1_8bpc_avx2(
228                   dst_ptr, stride, edge_ptr, w, h, angle_arg,
229                 );
230               } else if angle < 180 {
231                 rav1e_ipred_z2_8bpc_avx2(
232                   dst_ptr, stride, edge_ptr, w, h, angle_arg, dx, dy,
233                 );
234               } else {
235                 rav1e_ipred_z3_8bpc_avx2(
236                   dst_ptr, stride, edge_ptr, w, h, angle_arg,
237                 );
238               }
239             }
240             PredictionMode::SMOOTH_PRED => {
241               rav1e_ipred_smooth_8bpc_avx2(
242                 dst_ptr, stride, edge_ptr, w, h, angle,
243               );
244             }
245             PredictionMode::SMOOTH_V_PRED => {
246               rav1e_ipred_smooth_v_8bpc_avx2(
247                 dst_ptr, stride, edge_ptr, w, h, angle,
248               );
249             }
250             PredictionMode::SMOOTH_H_PRED => {
251               rav1e_ipred_smooth_h_8bpc_avx2(
252                 dst_ptr, stride, edge_ptr, w, h, angle,
253               );
254             }
255             PredictionMode::PAETH_PRED => {
256               rav1e_ipred_paeth_8bpc_avx2(
257                 dst_ptr, stride, edge_ptr, w, h, angle,
258               );
259             }
260             PredictionMode::UV_CFL_PRED => {
261               let ac_ptr = ac.as_ptr() as *const _;
262               (match variant {
263                 PredictionVariant::NONE => rav1e_ipred_cfl_128_8bpc_avx2,
264                 PredictionVariant::LEFT => rav1e_ipred_cfl_left_8bpc_avx2,
265                 PredictionVariant::TOP => rav1e_ipred_cfl_top_8bpc_avx2,
266                 PredictionVariant::BOTH => rav1e_ipred_cfl_8bpc_avx2,
267               })(dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle);
268             }
269             _ => call_rust(dst),
270           }
271         } else if cpu >= CpuFeatureLevel::SSSE3 {
272           match mode {
273             PredictionMode::DC_PRED => {
274               (match variant {
275                 PredictionVariant::NONE => rav1e_ipred_dc_128_8bpc_ssse3,
276                 PredictionVariant::LEFT => rav1e_ipred_dc_left_8bpc_ssse3,
277                 PredictionVariant::TOP => rav1e_ipred_dc_top_8bpc_ssse3,
278                 PredictionVariant::BOTH => rav1e_ipred_dc_8bpc_ssse3,
279               })(dst_ptr, stride, edge_ptr, w, h, angle);
280             }
281             PredictionMode::V_PRED if angle == 90 => {
282               rav1e_ipred_v_8bpc_ssse3(dst_ptr, stride, edge_ptr, w, h, angle);
283             }
284             PredictionMode::H_PRED if angle == 180 => {
285               rav1e_ipred_h_8bpc_ssse3(dst_ptr, stride, edge_ptr, w, h, angle);
286             }
287             PredictionMode::SMOOTH_PRED => {
288               rav1e_ipred_smooth_8bpc_ssse3(
289                 dst_ptr, stride, edge_ptr, w, h, angle,
290               );
291             }
292             PredictionMode::SMOOTH_V_PRED => {
293               rav1e_ipred_smooth_v_8bpc_ssse3(
294                 dst_ptr, stride, edge_ptr, w, h, angle,
295               );
296             }
297             PredictionMode::SMOOTH_H_PRED => {
298               rav1e_ipred_smooth_h_8bpc_ssse3(
299                 dst_ptr, stride, edge_ptr, w, h, angle,
300               );
301             }
302             PredictionMode::PAETH_PRED => {
303               rav1e_ipred_paeth_8bpc_ssse3(
304                 dst_ptr, stride, edge_ptr, w, h, angle,
305               );
306             }
307             PredictionMode::UV_CFL_PRED => {
308               let ac_ptr = ac.as_ptr() as *const _;
309               (match variant {
310                 PredictionVariant::NONE => rav1e_ipred_cfl_128_8bpc_ssse3,
311                 PredictionVariant::LEFT => rav1e_ipred_cfl_left_8bpc_ssse3,
312                 PredictionVariant::TOP => rav1e_ipred_cfl_top_8bpc_ssse3,
313                 PredictionVariant::BOTH => rav1e_ipred_cfl_8bpc_ssse3,
314               })(dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle);
315             }
316             _ => call_rust(dst),
317           }
318         }
319       }
320       PixelType::U16 if cpu >= CpuFeatureLevel::AVX2 && bit_depth > 8 => {
321         let dst_ptr = dst.data_ptr_mut() as *mut _;
322         let edge_ptr =
323           edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
324         let bd_max = (1 << bit_depth) - 1;
325         match mode {
326           PredictionMode::DC_PRED => {
327             (match variant {
328               PredictionVariant::NONE => rav1e_ipred_dc_128_16bpc_avx2,
329               PredictionVariant::LEFT => rav1e_ipred_dc_left_16bpc_avx2,
330               PredictionVariant::TOP => rav1e_ipred_dc_top_16bpc_avx2,
331               PredictionVariant::BOTH => rav1e_ipred_dc_16bpc_avx2,
332             })(
333               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max
334             );
335           }
336           PredictionMode::V_PRED if angle == 90 => {
337             rav1e_ipred_v_16bpc_avx2(
338               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
339             );
340           }
341           PredictionMode::H_PRED if angle == 180 => {
342             rav1e_ipred_h_16bpc_avx2(
343               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
344             );
345           }
346           PredictionMode::V_PRED
347           | PredictionMode::H_PRED
348           | PredictionMode::D45_PRED
349           | PredictionMode::D135_PRED
350           | PredictionMode::D113_PRED
351           | PredictionMode::D157_PRED
352           | PredictionMode::D203_PRED
353           | PredictionMode::D67_PRED => {
354             let (enable_ief, ief_smooth_filter) = if let Some(params) =
355               ief_params
356             {
357               (true as libc::c_int, params.use_smooth_filter() as libc::c_int)
358             } else {
359               (false as libc::c_int, false as libc::c_int)
360             };
361 
362             // dav1d assembly uses the unused integer bits to hold IEF parameters
363             let angle_arg =
364               angle | (enable_ief << 10) | (ief_smooth_filter << 9);
365 
366             // From dav1d, bw and bh are the frame width and height rounded to 8px units
367             let (bw, bh) = (
368               ((dst.plane_cfg.width + 7) >> 3) << 3,
369               ((dst.plane_cfg.height + 7) >> 3) << 3,
370             );
371             // From dav1d, dx and dy are the distance from the predicted block to the frame edge
372             let (dx, dy) = (
373               (bw as isize - dst.rect().x as isize) as libc::c_int,
374               (bh as isize - dst.rect().y as isize) as libc::c_int,
375             );
376 
377             if angle <= 90 {
378               rav1e_ipred_z1_16bpc_avx2(
379                 dst_ptr, stride, edge_ptr, w, h, angle_arg, 0, 0, bd_max,
380               );
381             } else if angle < 180 {
382               rav1e_ipred_z2_16bpc_avx2(
383                 dst_ptr, stride, edge_ptr, w, h, angle_arg, dx, dy, bd_max,
384               );
385             } else {
386               rav1e_ipred_z3_16bpc_avx2(
387                 dst_ptr, stride, edge_ptr, w, h, angle_arg, 0, 0, bd_max,
388               );
389             }
390           }
391           PredictionMode::SMOOTH_PRED => {
392             rav1e_ipred_smooth_16bpc_avx2(
393               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
394             );
395           }
396           PredictionMode::SMOOTH_V_PRED => {
397             rav1e_ipred_smooth_v_16bpc_avx2(
398               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
399             );
400           }
401           PredictionMode::SMOOTH_H_PRED => {
402             rav1e_ipred_smooth_h_16bpc_avx2(
403               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
404             );
405           }
406           PredictionMode::PAETH_PRED => {
407             rav1e_ipred_paeth_16bpc_avx2(
408               dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
409             );
410           }
411           PredictionMode::UV_CFL_PRED => {
412             let ac_ptr = ac.as_ptr() as *const _;
413             (match variant {
414               PredictionVariant::NONE => rav1e_ipred_cfl_128_16bpc_avx2,
415               PredictionVariant::LEFT => rav1e_ipred_cfl_left_16bpc_avx2,
416               PredictionVariant::TOP => rav1e_ipred_cfl_top_16bpc_avx2,
417               PredictionVariant::BOTH => rav1e_ipred_cfl_16bpc_avx2,
418             })(
419               dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle, bd_max
420             );
421           }
422           _ => call_rust(dst),
423         }
424       }
425       _ => call_rust(dst),
426     }
427   }
428 }
429