1 /*
2  * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 #ifndef AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
12 #define AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
13 
14 #include "config/aom_config.h"
15 #include "config/av1_rtcd.h"
16 
17 #include "aom/aom_integer.h"
18 #include "av1/common/enums.h"
19 #include "av1/common/av1_inv_txfm1d.h"
20 #include "av1/common/av1_inv_txfm1d_cfg.h"
21 #include "av1/common/av1_txfm.h"
22 
23 typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output,
24                                   const int8_t cos_bit,
25                                   const int8_t *stage_ptr);
26 typedef void (*transform_neon)(int16x8_t *input, int16x8_t *output,
27                                int8_t cos_bit, int bit);
28 
29 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
30   0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
31 };
32 
33 DECLARE_ALIGNED(16, static const int16_t,
34                 av1_eob_to_eobxy_16x16_default[16]) = {
35   0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
36   0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
37 };
38 
39 DECLARE_ALIGNED(16, static const int16_t,
40                 av1_eob_to_eobxy_32x32_default[32]) = {
41   0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
42   0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
43   0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
44   0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
45 };
46 
47 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
48   0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
49   0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
50 };
51 
52 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
53   0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
54 };
55 
56 DECLARE_ALIGNED(16, static const int16_t,
57                 av1_eob_to_eobxy_16x32_default[32]) = {
58   0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
59   0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
60   0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
61   0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
62 };
63 
64 DECLARE_ALIGNED(16, static const int16_t,
65                 av1_eob_to_eobxy_32x16_default[16]) = {
66   0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
67   0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
68 };
69 
70 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
71   0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
72   0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
73   0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
74   0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
75 };
76 
77 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
78   0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
79 };
80 
81 DECLARE_ALIGNED(16, static const int16_t *,
82                 av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
83   NULL,
84   av1_eob_to_eobxy_8x8_default,
85   av1_eob_to_eobxy_16x16_default,
86   av1_eob_to_eobxy_32x32_default,
87   av1_eob_to_eobxy_32x32_default,
88   NULL,
89   NULL,
90   av1_eob_to_eobxy_8x16_default,
91   av1_eob_to_eobxy_16x8_default,
92   av1_eob_to_eobxy_16x32_default,
93   av1_eob_to_eobxy_32x16_default,
94   av1_eob_to_eobxy_32x32_default,
95   av1_eob_to_eobxy_32x32_default,
96   NULL,
97   NULL,
98   av1_eob_to_eobxy_8x32_default,
99   av1_eob_to_eobxy_32x8_default,
100   av1_eob_to_eobxy_16x32_default,
101   av1_eob_to_eobxy_32x16_default,
102 };
103 
104 static const int lowbd_txfm_all_1d_zeros_idx[32] = {
105   0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
106   3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
107 };
108 
109 // Transform block width in log2 for eob (size of 64 map to 32)
110 static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
111   2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
112 };
113 
114 static int eob_fill[32] = {
115   0,  7,  7,  7,  7,  7,  7,  7,  15, 15, 15, 15, 15, 15, 15, 15,
116   31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
117 };
118 
get_eobx_eoby_scan_default(int * eobx,int * eoby,TX_SIZE tx_size,int eob)119 static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
120                                               TX_SIZE tx_size, int eob) {
121   if (eob == 1) {
122     *eobx = 0;
123     *eoby = 0;
124     return;
125   }
126 
127   const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
128   const int eob_row = (eob - 1) >> tx_w_log2;
129   const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
130   *eobx = eobxy & 0xFF;
131   *eoby = eobxy >> 8;
132 }
133 
get_eobx_eoby_scan_v_identity(int * eobx,int * eoby,TX_SIZE tx_size,int eob)134 static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
135                                                  TX_SIZE tx_size, int eob) {
136   eob -= 1;
137   const int txfm_size_row = tx_size_high[tx_size];
138   const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
139   *eobx = eob / (eoby_max + 1);
140   *eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
141 }
142 
get_eobx_eoby_scan_h_identity(int * eobx,int * eoby,TX_SIZE tx_size,int eob)143 static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
144                                                  TX_SIZE tx_size, int eob) {
145   eob -= 1;
146   const int txfm_size_col = tx_size_wide[tx_size];
147   const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
148   *eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
149   const int temp_eoby = eob / (eobx_max + 1);
150   assert(temp_eoby < 32);
151   *eoby = eob_fill[temp_eoby];
152 }
153 
154 #endif  // AOM_AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
155