1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11 #ifndef AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
12 #define AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
13
14 #include "config/aom_config.h"
15 #include "config/av1_rtcd.h"
16
17 #include "aom/aom_integer.h"
18 #include "av1/common/enums.h"
19 #include "av1/common/av1_inv_txfm1d.h"
20 #include "av1/common/av1_inv_txfm1d_cfg.h"
21 #include "av1/common/av1_txfm.h"
22
23 typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output,
24 const int8_t cos_bit,
25 const int8_t *stage_ptr);
26 typedef void (*transform_neon)(int16x8_t *input, int16x8_t *output,
27 int8_t cos_bit, int bit);
28
29 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
30 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
31 };
32
33 DECLARE_ALIGNED(16, static const int16_t,
34 av1_eob_to_eobxy_16x16_default[16]) = {
35 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
36 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
37 };
38
39 DECLARE_ALIGNED(16, static const int16_t,
40 av1_eob_to_eobxy_32x32_default[32]) = {
41 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
42 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
43 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
44 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
45 };
46
47 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
48 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
49 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
50 };
51
52 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
53 0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
54 };
55
56 DECLARE_ALIGNED(16, static const int16_t,
57 av1_eob_to_eobxy_16x32_default[32]) = {
58 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
59 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
60 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
61 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
62 };
63
64 DECLARE_ALIGNED(16, static const int16_t,
65 av1_eob_to_eobxy_32x16_default[16]) = {
66 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
67 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
68 };
69
70 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
71 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
72 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
73 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
74 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
75 };
76
77 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
78 0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
79 };
80
81 DECLARE_ALIGNED(16, static const int16_t *,
82 av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
83 NULL,
84 av1_eob_to_eobxy_8x8_default,
85 av1_eob_to_eobxy_16x16_default,
86 av1_eob_to_eobxy_32x32_default,
87 av1_eob_to_eobxy_32x32_default,
88 NULL,
89 NULL,
90 av1_eob_to_eobxy_8x16_default,
91 av1_eob_to_eobxy_16x8_default,
92 av1_eob_to_eobxy_16x32_default,
93 av1_eob_to_eobxy_32x16_default,
94 av1_eob_to_eobxy_32x32_default,
95 av1_eob_to_eobxy_32x32_default,
96 NULL,
97 NULL,
98 av1_eob_to_eobxy_8x32_default,
99 av1_eob_to_eobxy_32x8_default,
100 av1_eob_to_eobxy_16x32_default,
101 av1_eob_to_eobxy_32x16_default,
102 };
103
104 static const int lowbd_txfm_all_1d_zeros_idx[32] = {
105 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
106 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
107 };
108
109 // Transform block width in log2 for eob (size of 64 map to 32)
110 static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
111 2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
112 };
113
114 static int eob_fill[32] = {
115 0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
116 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
117 };
118
get_eobx_eoby_scan_default(int * eobx,int * eoby,TX_SIZE tx_size,int eob)119 static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
120 TX_SIZE tx_size, int eob) {
121 if (eob == 1) {
122 *eobx = 0;
123 *eoby = 0;
124 return;
125 }
126
127 const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
128 const int eob_row = (eob - 1) >> tx_w_log2;
129 const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
130 *eobx = eobxy & 0xFF;
131 *eoby = eobxy >> 8;
132 }
133
get_eobx_eoby_scan_v_identity(int * eobx,int * eoby,TX_SIZE tx_size,int eob)134 static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
135 TX_SIZE tx_size, int eob) {
136 eob -= 1;
137 const int txfm_size_row = tx_size_high[tx_size];
138 const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
139 *eobx = eob / (eoby_max + 1);
140 *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
141 }
142
get_eobx_eoby_scan_h_identity(int * eobx,int * eoby,TX_SIZE tx_size,int eob)143 static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
144 TX_SIZE tx_size, int eob) {
145 eob -= 1;
146 const int txfm_size_col = tx_size_wide[tx_size];
147 const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
148 *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
149 const int temp_eoby = eob / (eobx_max + 1);
150 assert(temp_eoby < 32);
151 *eoby = eob_fill[temp_eoby];
152 }
153
154 #endif // AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
155