1 // Copyright (c) 2019-2021, The rav1e contributors. All rights reserved
2 //
3 // This source code is subject to the terms of the BSD 2 Clause License and
4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5 // was not distributed with this source code in the LICENSE file, you can
6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
7 // Media Patent License 1.0 was not distributed with this source code in the
8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9
10 use crate::context::MAX_TX_SIZE;
11 use crate::cpu_features::CpuFeatureLevel;
12 use crate::predict::{
13 rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
14 };
15 use crate::tiling::PlaneRegionMut;
16 use crate::transform::TxSize;
17 use crate::util::Aligned;
18 use crate::Pixel;
19 use v_frame::pixel::PixelType;
20
21 macro_rules! decl_angular_ipred_fn {
22 ($($f:ident),+) => {
23 extern {
24 $(
25 fn $f(
26 dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
27 width: libc::c_int, height: libc::c_int, angle: libc::c_int,
28 );
29 )*
30 }
31 };
32 }
33
34 decl_angular_ipred_fn! {
35 rav1e_ipred_dc_8bpc_avx2,
36 rav1e_ipred_dc_8bpc_ssse3,
37 rav1e_ipred_dc_128_8bpc_avx2,
38 rav1e_ipred_dc_128_8bpc_ssse3,
39 rav1e_ipred_dc_left_8bpc_avx2,
40 rav1e_ipred_dc_left_8bpc_ssse3,
41 rav1e_ipred_dc_top_8bpc_avx2,
42 rav1e_ipred_dc_top_8bpc_ssse3,
43 rav1e_ipred_v_8bpc_avx2,
44 rav1e_ipred_v_8bpc_ssse3,
45 rav1e_ipred_h_8bpc_avx2,
46 rav1e_ipred_h_8bpc_ssse3,
47 rav1e_ipred_z1_8bpc_avx2,
48 rav1e_ipred_z3_8bpc_avx2,
49 rav1e_ipred_smooth_8bpc_avx2,
50 rav1e_ipred_smooth_8bpc_ssse3,
51 rav1e_ipred_smooth_v_8bpc_avx2,
52 rav1e_ipred_smooth_v_8bpc_ssse3,
53 rav1e_ipred_smooth_h_8bpc_avx2,
54 rav1e_ipred_smooth_h_8bpc_ssse3,
55 rav1e_ipred_paeth_8bpc_avx2,
56 rav1e_ipred_paeth_8bpc_ssse3
57 }
58
59 macro_rules! decl_angular_ipred_hbd_fn {
60 ($($f:ident),+) => {
61 extern {
62 $(
63 fn $f(
64 dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
65 width: libc::c_int, height: libc::c_int, angle: libc::c_int,
66 max_width: libc::c_int, max_height: libc::c_int,
67 bit_depth_max: libc::c_int,
68 );
69 )*
70 }
71 };
72 }
73
74 decl_angular_ipred_hbd_fn! {
75 rav1e_ipred_dc_16bpc_avx2,
76 rav1e_ipred_dc_128_16bpc_avx2,
77 rav1e_ipred_dc_left_16bpc_avx2,
78 rav1e_ipred_dc_top_16bpc_avx2,
79 rav1e_ipred_v_16bpc_avx2,
80 rav1e_ipred_h_16bpc_avx2,
81 rav1e_ipred_z1_16bpc_avx2,
82 rav1e_ipred_z3_16bpc_avx2,
83 rav1e_ipred_smooth_16bpc_avx2,
84 rav1e_ipred_smooth_v_16bpc_avx2,
85 rav1e_ipred_smooth_h_16bpc_avx2,
86 rav1e_ipred_paeth_16bpc_avx2
87 }
88
89 // For z2 prediction, we need to provide extra parameters, dx and dy, which indicate
90 // the distance between the predicted block's top-left pixel and the frame's edge.
91 // It is required for the intra edge filtering process.
92 extern {
rav1e_ipred_z2_8bpc_avx2( dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8, width: libc::c_int, height: libc::c_int, angle: libc::c_int, dx: libc::c_int, dy: libc::c_int, )93 fn rav1e_ipred_z2_8bpc_avx2(
94 dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
95 width: libc::c_int, height: libc::c_int, angle: libc::c_int,
96 dx: libc::c_int, dy: libc::c_int,
97 );
98
rav1e_ipred_z2_16bpc_avx2( dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16, width: libc::c_int, height: libc::c_int, angle: libc::c_int, dx: libc::c_int, dy: libc::c_int, bit_depth_max: libc::c_int, )99 fn rav1e_ipred_z2_16bpc_avx2(
100 dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
101 width: libc::c_int, height: libc::c_int, angle: libc::c_int,
102 dx: libc::c_int, dy: libc::c_int, bit_depth_max: libc::c_int,
103 );
104 }
105
106 macro_rules! decl_cfl_pred_fn {
107 ($($f:ident),+) => {
108 extern {
109 $(
110 fn $f(
111 dst: *mut u8, stride: libc::ptrdiff_t, topleft: *const u8,
112 width: libc::c_int, height: libc::c_int, ac: *const i16,
113 alpha: libc::c_int,
114 );
115 )*
116 }
117 };
118 }
119
120 decl_cfl_pred_fn! {
121 rav1e_ipred_cfl_8bpc_avx2,
122 rav1e_ipred_cfl_8bpc_ssse3,
123 rav1e_ipred_cfl_128_8bpc_avx2,
124 rav1e_ipred_cfl_128_8bpc_ssse3,
125 rav1e_ipred_cfl_left_8bpc_avx2,
126 rav1e_ipred_cfl_left_8bpc_ssse3,
127 rav1e_ipred_cfl_top_8bpc_avx2,
128 rav1e_ipred_cfl_top_8bpc_ssse3
129 }
130
131 macro_rules! decl_cfl_pred_hbd_fn {
132 ($($f:ident),+) => {
133 extern {
134 $(
135 fn $f(
136 dst: *mut u16, stride: libc::ptrdiff_t, topleft: *const u16,
137 width: libc::c_int, height: libc::c_int, ac: *const i16,
138 alpha: libc::c_int, bit_depth_max: libc::c_int,
139 );
140 )*
141 }
142 };
143 }
144
145 decl_cfl_pred_hbd_fn! {
146 rav1e_ipred_cfl_16bpc_avx2,
147 rav1e_ipred_cfl_128_16bpc_avx2,
148 rav1e_ipred_cfl_left_16bpc_avx2,
149 rav1e_ipred_cfl_top_16bpc_avx2
150 }
151
152 #[inline(always)]
dispatch_predict_intra<T: Pixel>( mode: PredictionMode, variant: PredictionVariant, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>, edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel, )153 pub fn dispatch_predict_intra<T: Pixel>(
154 mode: PredictionMode, variant: PredictionVariant,
155 dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
156 ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
157 edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
158 ) {
159 let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
160 rust::dispatch_predict_intra(
161 mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf,
162 cpu,
163 );
164 };
165
166 unsafe {
167 let stride = T::to_asm_stride(dst.plane_cfg.stride) as libc::ptrdiff_t;
168 let w = tx_size.width() as libc::c_int;
169 let h = tx_size.height() as libc::c_int;
170 let angle = angle as libc::c_int;
171
172 match T::type_enum() {
173 PixelType::U8 if cpu >= CpuFeatureLevel::SSSE3 => {
174 let dst_ptr = dst.data_ptr_mut() as *mut _;
175 let edge_ptr =
176 edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
177 if cpu >= CpuFeatureLevel::AVX2 {
178 match mode {
179 PredictionMode::DC_PRED => {
180 (match variant {
181 PredictionVariant::NONE => rav1e_ipred_dc_128_8bpc_avx2,
182 PredictionVariant::LEFT => rav1e_ipred_dc_left_8bpc_avx2,
183 PredictionVariant::TOP => rav1e_ipred_dc_top_8bpc_avx2,
184 PredictionVariant::BOTH => rav1e_ipred_dc_8bpc_avx2,
185 })(dst_ptr, stride, edge_ptr, w, h, angle);
186 }
187 PredictionMode::V_PRED if angle == 90 => {
188 rav1e_ipred_v_8bpc_avx2(dst_ptr, stride, edge_ptr, w, h, angle);
189 }
190 PredictionMode::H_PRED if angle == 180 => {
191 rav1e_ipred_h_8bpc_avx2(dst_ptr, stride, edge_ptr, w, h, angle);
192 }
193 PredictionMode::V_PRED
194 | PredictionMode::H_PRED
195 | PredictionMode::D45_PRED
196 | PredictionMode::D135_PRED
197 | PredictionMode::D113_PRED
198 | PredictionMode::D157_PRED
199 | PredictionMode::D203_PRED
200 | PredictionMode::D67_PRED => {
201 let (enable_ief, ief_smooth_filter) =
202 if let Some(params) = ief_params {
203 (
204 true as libc::c_int,
205 params.use_smooth_filter() as libc::c_int,
206 )
207 } else {
208 (false as libc::c_int, false as libc::c_int)
209 };
210
211 // dav1d assembly uses the unused integer bits to hold IEF parameters
212 let angle_arg =
213 angle | (enable_ief << 10) | (ief_smooth_filter << 9);
214
215 // From dav1d, bw and bh are the frame width and height rounded to 8px units
216 let (bw, bh) = (
217 ((dst.plane_cfg.width + 7) >> 3) << 3,
218 ((dst.plane_cfg.height + 7) >> 3) << 3,
219 );
220 // From dav1d, dx and dy are the distance from the predicted block to the frame edge
221 let (dx, dy) = (
222 (bw as isize - dst.rect().x as isize) as libc::c_int,
223 (bh as isize - dst.rect().y as isize) as libc::c_int,
224 );
225
226 if angle <= 90 {
227 rav1e_ipred_z1_8bpc_avx2(
228 dst_ptr, stride, edge_ptr, w, h, angle_arg,
229 );
230 } else if angle < 180 {
231 rav1e_ipred_z2_8bpc_avx2(
232 dst_ptr, stride, edge_ptr, w, h, angle_arg, dx, dy,
233 );
234 } else {
235 rav1e_ipred_z3_8bpc_avx2(
236 dst_ptr, stride, edge_ptr, w, h, angle_arg,
237 );
238 }
239 }
240 PredictionMode::SMOOTH_PRED => {
241 rav1e_ipred_smooth_8bpc_avx2(
242 dst_ptr, stride, edge_ptr, w, h, angle,
243 );
244 }
245 PredictionMode::SMOOTH_V_PRED => {
246 rav1e_ipred_smooth_v_8bpc_avx2(
247 dst_ptr, stride, edge_ptr, w, h, angle,
248 );
249 }
250 PredictionMode::SMOOTH_H_PRED => {
251 rav1e_ipred_smooth_h_8bpc_avx2(
252 dst_ptr, stride, edge_ptr, w, h, angle,
253 );
254 }
255 PredictionMode::PAETH_PRED => {
256 rav1e_ipred_paeth_8bpc_avx2(
257 dst_ptr, stride, edge_ptr, w, h, angle,
258 );
259 }
260 PredictionMode::UV_CFL_PRED => {
261 let ac_ptr = ac.as_ptr() as *const _;
262 (match variant {
263 PredictionVariant::NONE => rav1e_ipred_cfl_128_8bpc_avx2,
264 PredictionVariant::LEFT => rav1e_ipred_cfl_left_8bpc_avx2,
265 PredictionVariant::TOP => rav1e_ipred_cfl_top_8bpc_avx2,
266 PredictionVariant::BOTH => rav1e_ipred_cfl_8bpc_avx2,
267 })(dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle);
268 }
269 _ => call_rust(dst),
270 }
271 } else if cpu >= CpuFeatureLevel::SSSE3 {
272 match mode {
273 PredictionMode::DC_PRED => {
274 (match variant {
275 PredictionVariant::NONE => rav1e_ipred_dc_128_8bpc_ssse3,
276 PredictionVariant::LEFT => rav1e_ipred_dc_left_8bpc_ssse3,
277 PredictionVariant::TOP => rav1e_ipred_dc_top_8bpc_ssse3,
278 PredictionVariant::BOTH => rav1e_ipred_dc_8bpc_ssse3,
279 })(dst_ptr, stride, edge_ptr, w, h, angle);
280 }
281 PredictionMode::V_PRED if angle == 90 => {
282 rav1e_ipred_v_8bpc_ssse3(dst_ptr, stride, edge_ptr, w, h, angle);
283 }
284 PredictionMode::H_PRED if angle == 180 => {
285 rav1e_ipred_h_8bpc_ssse3(dst_ptr, stride, edge_ptr, w, h, angle);
286 }
287 PredictionMode::SMOOTH_PRED => {
288 rav1e_ipred_smooth_8bpc_ssse3(
289 dst_ptr, stride, edge_ptr, w, h, angle,
290 );
291 }
292 PredictionMode::SMOOTH_V_PRED => {
293 rav1e_ipred_smooth_v_8bpc_ssse3(
294 dst_ptr, stride, edge_ptr, w, h, angle,
295 );
296 }
297 PredictionMode::SMOOTH_H_PRED => {
298 rav1e_ipred_smooth_h_8bpc_ssse3(
299 dst_ptr, stride, edge_ptr, w, h, angle,
300 );
301 }
302 PredictionMode::PAETH_PRED => {
303 rav1e_ipred_paeth_8bpc_ssse3(
304 dst_ptr, stride, edge_ptr, w, h, angle,
305 );
306 }
307 PredictionMode::UV_CFL_PRED => {
308 let ac_ptr = ac.as_ptr() as *const _;
309 (match variant {
310 PredictionVariant::NONE => rav1e_ipred_cfl_128_8bpc_ssse3,
311 PredictionVariant::LEFT => rav1e_ipred_cfl_left_8bpc_ssse3,
312 PredictionVariant::TOP => rav1e_ipred_cfl_top_8bpc_ssse3,
313 PredictionVariant::BOTH => rav1e_ipred_cfl_8bpc_ssse3,
314 })(dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle);
315 }
316 _ => call_rust(dst),
317 }
318 }
319 }
320 PixelType::U16 if cpu >= CpuFeatureLevel::AVX2 && bit_depth > 8 => {
321 let dst_ptr = dst.data_ptr_mut() as *mut _;
322 let edge_ptr =
323 edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
324 let bd_max = (1 << bit_depth) - 1;
325 match mode {
326 PredictionMode::DC_PRED => {
327 (match variant {
328 PredictionVariant::NONE => rav1e_ipred_dc_128_16bpc_avx2,
329 PredictionVariant::LEFT => rav1e_ipred_dc_left_16bpc_avx2,
330 PredictionVariant::TOP => rav1e_ipred_dc_top_16bpc_avx2,
331 PredictionVariant::BOTH => rav1e_ipred_dc_16bpc_avx2,
332 })(
333 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max
334 );
335 }
336 PredictionMode::V_PRED if angle == 90 => {
337 rav1e_ipred_v_16bpc_avx2(
338 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
339 );
340 }
341 PredictionMode::H_PRED if angle == 180 => {
342 rav1e_ipred_h_16bpc_avx2(
343 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
344 );
345 }
346 PredictionMode::V_PRED
347 | PredictionMode::H_PRED
348 | PredictionMode::D45_PRED
349 | PredictionMode::D135_PRED
350 | PredictionMode::D113_PRED
351 | PredictionMode::D157_PRED
352 | PredictionMode::D203_PRED
353 | PredictionMode::D67_PRED => {
354 let (enable_ief, ief_smooth_filter) = if let Some(params) =
355 ief_params
356 {
357 (true as libc::c_int, params.use_smooth_filter() as libc::c_int)
358 } else {
359 (false as libc::c_int, false as libc::c_int)
360 };
361
362 // dav1d assembly uses the unused integer bits to hold IEF parameters
363 let angle_arg =
364 angle | (enable_ief << 10) | (ief_smooth_filter << 9);
365
366 // From dav1d, bw and bh are the frame width and height rounded to 8px units
367 let (bw, bh) = (
368 ((dst.plane_cfg.width + 7) >> 3) << 3,
369 ((dst.plane_cfg.height + 7) >> 3) << 3,
370 );
371 // From dav1d, dx and dy are the distance from the predicted block to the frame edge
372 let (dx, dy) = (
373 (bw as isize - dst.rect().x as isize) as libc::c_int,
374 (bh as isize - dst.rect().y as isize) as libc::c_int,
375 );
376
377 if angle <= 90 {
378 rav1e_ipred_z1_16bpc_avx2(
379 dst_ptr, stride, edge_ptr, w, h, angle_arg, 0, 0, bd_max,
380 );
381 } else if angle < 180 {
382 rav1e_ipred_z2_16bpc_avx2(
383 dst_ptr, stride, edge_ptr, w, h, angle_arg, dx, dy, bd_max,
384 );
385 } else {
386 rav1e_ipred_z3_16bpc_avx2(
387 dst_ptr, stride, edge_ptr, w, h, angle_arg, 0, 0, bd_max,
388 );
389 }
390 }
391 PredictionMode::SMOOTH_PRED => {
392 rav1e_ipred_smooth_16bpc_avx2(
393 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
394 );
395 }
396 PredictionMode::SMOOTH_V_PRED => {
397 rav1e_ipred_smooth_v_16bpc_avx2(
398 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
399 );
400 }
401 PredictionMode::SMOOTH_H_PRED => {
402 rav1e_ipred_smooth_h_16bpc_avx2(
403 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
404 );
405 }
406 PredictionMode::PAETH_PRED => {
407 rav1e_ipred_paeth_16bpc_avx2(
408 dst_ptr, stride, edge_ptr, w, h, angle, 0, 0, bd_max,
409 );
410 }
411 PredictionMode::UV_CFL_PRED => {
412 let ac_ptr = ac.as_ptr() as *const _;
413 (match variant {
414 PredictionVariant::NONE => rav1e_ipred_cfl_128_16bpc_avx2,
415 PredictionVariant::LEFT => rav1e_ipred_cfl_left_16bpc_avx2,
416 PredictionVariant::TOP => rav1e_ipred_cfl_top_16bpc_avx2,
417 PredictionVariant::BOTH => rav1e_ipred_cfl_16bpc_avx2,
418 })(
419 dst_ptr, stride, edge_ptr, w, h, ac_ptr, angle, bd_max
420 );
421 }
422 _ => call_rust(dst),
423 }
424 }
425 _ => call_rust(dst),
426 }
427 }
428 }
429