1 // Copyright (c) 2018-2020, The rav1e contributors. All rights reserved
2 //
3 // This source code is subject to the terms of the BSD 2 Clause License and
4 // the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
5 // was not distributed with this source code in the LICENSE file, you can
6 // obtain it at www.aomedia.org/license/software. If the Alliance for Open
7 // Media Patent License 1.0 was not distributed with this source code in the
8 // PATENTS file, you can obtain it at www.aomedia.org/license/patent.
9 
10 use crate::cpu_features::CpuFeatureLevel;
11 use crate::util::*;
12 
13 use super::TxType;
14 
15 cfg_if::cfg_if! {
16   if #[cfg(nasm_x86_64)] {
17     pub use crate::asm::x86::transform::forward::*;
18   } else {
19     pub use self::rust::*;
20   }
21 }
22 
23 pub mod rust {
24   use super::*;
25 
26   use crate::transform::forward_shared::*;
27   use crate::transform::{av1_round_shift_array, valid_av1_transform, TxSize};
28   use simd_helpers::cold_for_target_arch;
29 
30   pub trait TxOperations: Copy {
zero() -> Self31     fn zero() -> Self;
32 
tx_mul(self, _: (i32, i32)) -> Self33     fn tx_mul(self, _: (i32, i32)) -> Self;
rshift1(self) -> Self34     fn rshift1(self) -> Self;
add(self, b: Self) -> Self35     fn add(self, b: Self) -> Self;
sub(self, b: Self) -> Self36     fn sub(self, b: Self) -> Self;
add_avg(self, b: Self) -> Self37     fn add_avg(self, b: Self) -> Self;
sub_avg(self, b: Self) -> Self38     fn sub_avg(self, b: Self) -> Self;
39 
copy_fn(self) -> Self40     fn copy_fn(self) -> Self {
41       self
42     }
43   }
44 
45   impl TxOperations for i32 {
zero() -> Self46     fn zero() -> Self {
47       0
48     }
49 
tx_mul(self, mul: (i32, i32)) -> Self50     fn tx_mul(self, mul: (i32, i32)) -> Self {
51       ((self * mul.0) + (1 << mul.1 >> 1)) >> mul.1
52     }
53 
rshift1(self) -> Self54     fn rshift1(self) -> Self {
55       (self + if self < 0 { 1 } else { 0 }) >> 1
56     }
57 
add(self, b: Self) -> Self58     fn add(self, b: Self) -> Self {
59       self + b
60     }
61 
sub(self, b: Self) -> Self62     fn sub(self, b: Self) -> Self {
63       self - b
64     }
65 
add_avg(self, b: Self) -> Self66     fn add_avg(self, b: Self) -> Self {
67       (self + b) >> 1
68     }
69 
sub_avg(self, b: Self) -> Self70     fn sub_avg(self, b: Self) -> Self {
71       (self - b) >> 1
72     }
73   }
74 
75   impl_1d_tx!();
76 
77   type TxfmFunc = fn(&mut [i32]);
78 
get_func(t: TxfmType) -> TxfmFunc79   fn get_func(t: TxfmType) -> TxfmFunc {
80     use self::TxfmType::*;
81     match t {
82       DCT4 => daala_fdct4,
83       DCT8 => daala_fdct8,
84       DCT16 => daala_fdct16,
85       DCT32 => daala_fdct32,
86       DCT64 => daala_fdct64,
87       ADST4 => daala_fdst_vii_4,
88       ADST8 => daala_fdst8,
89       ADST16 => daala_fdst16,
90       Identity4 => fidentity,
91       Identity8 => fidentity,
92       Identity16 => fidentity,
93       Identity32 => fidentity,
94       _ => unreachable!(),
95     }
96   }
97 
98   #[cold_for_target_arch("x86_64")]
forward_transform<T: Coefficient>( input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize, tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel, )99   pub fn forward_transform<T: Coefficient>(
100     input: &[i16], output: &mut [T], stride: usize, tx_size: TxSize,
101     tx_type: TxType, bd: usize, _cpu: CpuFeatureLevel,
102   ) {
103     assert!(valid_av1_transform(tx_size, tx_type));
104 
105     // Note when assigning txfm_size_col, we use the txfm_size from the
106     // row configuration and vice versa. This is intentionally done to
107     // accurately perform rectangular transforms. When the transform is
108     // rectangular, the number of columns will be the same as the
109     // txfm_size stored in the row cfg struct. It will make no difference
110     // for square transforms.
111     let txfm_size_col = tx_size.width();
112     let txfm_size_row = tx_size.height();
113 
114     let mut tmp: Aligned<[i32; 64 * 64]> = Aligned::uninitialized();
115     let buf = &mut tmp.data[..txfm_size_col * txfm_size_row];
116 
117     let cfg = Txfm2DFlipCfg::fwd(tx_type, tx_size, bd);
118 
119     let txfm_func_col = get_func(cfg.txfm_type_col);
120     let txfm_func_row = get_func(cfg.txfm_type_row);
121 
122     // Columns
123     for c in 0..txfm_size_col {
124       let mut col_coeffs_backing: Aligned<[i32; 64]> =
125         Aligned::uninitialized();
126       let col_coeffs = &mut col_coeffs_backing.data[..txfm_size_row];
127       if cfg.ud_flip {
128         // flip upside down
129         for r in 0..txfm_size_row {
130           col_coeffs[r] = (input[(txfm_size_row - r - 1) * stride + c]).into();
131         }
132       } else {
133         for r in 0..txfm_size_row {
134           col_coeffs[r] = (input[r * stride + c]).into();
135         }
136       }
137 
138       av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[0]);
139       txfm_func_col(col_coeffs);
140       av1_round_shift_array(col_coeffs, txfm_size_row, -cfg.shift[1]);
141       if cfg.lr_flip {
142         for r in 0..txfm_size_row {
143           // flip from left to right
144           buf[r * txfm_size_col + (txfm_size_col - c - 1)] = col_coeffs[r];
145         }
146       } else {
147         for r in 0..txfm_size_row {
148           buf[r * txfm_size_col + c] = col_coeffs[r];
149         }
150       }
151     }
152 
153     // Rows
154     for r in 0..txfm_size_row {
155       let row_coeffs = &mut buf[r * txfm_size_col..];
156       txfm_func_row(row_coeffs);
157       av1_round_shift_array(row_coeffs, txfm_size_col, -cfg.shift[2]);
158 
159       // Store output in at most 32x32 chunks so that the first 32x32
160       // coefficients are stored first. When we don't have 64 rows, there is no
161       // change in order. With 64 rows, the chunks are in this order
162       //  - First 32 rows and first 32 cols
163       //  - Last 32 rows and first 32 cols
164       //  - First 32 rows and last 32 cols
165       //  - Last 32 rows and last 32 cols
166 
167       // Output is grouped into 32x32 chunks so a stride of at most 32 is
168       // used for each chunk.
169       let output_stride = txfm_size_row.min(32);
170 
171       // Split the first 32 rows from the last 32 rows
172       let output = &mut output
173         [(r >= 32) as usize * output_stride * txfm_size_col.min(32)..];
174 
175       for cg in (0..txfm_size_col).step_by(32) {
176         // Split the first 32 cols from the last 32 cols
177         let output = &mut output[txfm_size_row * cg..];
178 
179         for c in 0..txfm_size_col.min(32) {
180           output[c * output_stride + (r & 31)] =
181             T::cast_from(row_coeffs[c + cg]);
182         }
183       }
184     }
185   }
186 }
187