1 /*********************************************************************************
2 **   ITU-T G.722.1 (2005-05) - Fixed point implementation for main body and Annex C
3 **   > Software Release 2.1 (2008-06)
4 **     (Simple repackaging; no change from 2005-05 Release 2.0 code)
5 **
6 **   � 2004 Polycom, Inc.
7 **
8 **	 All rights reserved.
9 **
10 *********************************************************************************/
11 
12 /*********************************************************************************
13 * Filename: dct_type_iv_a.c
14 *
15 * Purpose:  Discrete Cosine Transform, Type IV used for MLT
16 *
17 * The basis functions are
18 *
19 *	 cos(PI*(t+0.5)*(k+0.5)/block_length)
20 *
21 * for time t and basis function number k.  Due to the symmetry of the expression
22 * in t and k, it is clear that the forward and inverse transforms are the same.
23 *
24 *********************************************************************************/
25 
26 /*********************************************************************************
27  Include files
28 *********************************************************************************/
29 #include "defs.h"
30 #include "count.h"
31 #include "dct4_a.h"
32 
33 /*********************************************************************************
34  External variable declarations
35 *********************************************************************************/
36 extern Word16       anal_bias[DCT_LENGTH];
37 extern Word16       dct_core_a[DCT_LENGTH_DIV_32][DCT_LENGTH_DIV_32];
38 extern cos_msin_t   a_cos_msin_2 [DCT_LENGTH_DIV_32];
39 extern cos_msin_t   a_cos_msin_4 [DCT_LENGTH_DIV_16];
40 extern cos_msin_t   a_cos_msin_8 [DCT_LENGTH_DIV_8];
41 extern cos_msin_t   a_cos_msin_16[DCT_LENGTH_DIV_4];
42 extern cos_msin_t   a_cos_msin_32[DCT_LENGTH_DIV_2];
43 extern cos_msin_t   a_cos_msin_64[DCT_LENGTH];
44 extern cos_msin_t   *a_cos_msin_table[];
45 
46 /*********************************************************************************
47  Function:    dct_type_iv_a
48 
49  Syntax:      void dct_type_iv_a (input, output, dct_length)
50                         Word16   input[], output[], dct_length;
51 
52  Description: Discrete Cosine Transform, Type IV used for MLT
53 
54  Design Notes:
55 
56  WMOPS:          |    24kbit    |     32kbit
57           -------|--------------|----------------
58             AVG  |    1.14      |     1.14
59           -------|--------------|----------------
60             MAX  |    1.14      |     1.14
61           -------|--------------|----------------
62 
63            14kHz |    24kbit    |     32kbit     |     48kbit
64           -------|--------------|----------------|----------------
65             AVG  |    2.57      |     2.57       |     2.57
66           -------|--------------|----------------|----------------
67             MAX  |    2.57      |     2.57       |     2.57
68           -------|--------------|----------------|----------------
69 
70 *********************************************************************************/
71 
dct_type_iv_a(Word16 * input,Word16 * output,Word16 dct_length)72 void dct_type_iv_a (Word16 *input,Word16 *output,Word16 dct_length)
73 {
74     Word16   buffer_a[MAX_DCT_LENGTH], buffer_b[MAX_DCT_LENGTH], buffer_c[MAX_DCT_LENGTH];
75     Word16   *in_ptr, *in_ptr_low, *in_ptr_high, *next_in_base;
76     Word16   *out_ptr_low, *out_ptr_high, *next_out_base;
77     Word16   *out_buffer, *in_buffer, *buffer_swap;
78     Word16   in_val_low, in_val_high;
79     Word16   out_val_low, out_val_high;
80     Word16   in_low_even, in_low_odd;
81     Word16   in_high_even, in_high_odd;
82     Word16   out_low_even, out_low_odd;
83     Word16   out_high_even, out_high_odd;
84     Word16   *pair_ptr;
85     Word16   cos_even, cos_odd, msin_even, msin_odd;
86     Word16   neg_cos_odd;
87     Word16   neg_msin_even;
88     Word32   sum;
89     Word16   set_span, set_count, set_count_log, pairs_left, sets_left;
90     Word16   i,k;
91     Word16   index;
92     cos_msin_t  **table_ptr_ptr, *cos_msin_ptr;
93 
94     Word16   temp;
95     Word32   acca;
96 
97     Word16   dct_length_log;
98 
99 
100     /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
101     /* Do the sum/difference butterflies, the first part of */
102     /* converting one N-point transform into N/2 two-point  */
103     /* transforms, where N = 1 << DCT_LENGTH_LOG. = 64/128  */
104     /*++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
105     test();
106     if (dct_length==DCT_LENGTH)
107     {
108         dct_length_log = DCT_LENGTH_LOG;
109 
110         /* Add bias offsets */
111         for (i=0;i<dct_length;i++)
112         {
113             input[i] = add(input[i],anal_bias[i]);
114             move16();
115         }
116     }
117     else
118         dct_length_log = MAX_DCT_LENGTH_LOG;
119 
120     index = 0L;
121     move16();
122 
123     in_buffer  = input;
124     move16();
125 
126     out_buffer = buffer_a;
127     move16();
128 
129     temp = sub(dct_length_log,2);
130     for (set_count_log=0;set_count_log<=temp;set_count_log++)
131     {
132 
133         /*===========================================================*/
134         /* Initialization for the loop over sets at the current size */
135         /*===========================================================*/
136 
137         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
138         set_span = shr(dct_length,set_count_log);
139 
140         set_count     = shl(1,set_count_log);
141 
142         in_ptr        = in_buffer;
143         move16();
144 
145         next_out_base = out_buffer;
146         move16();
147 
148         /*=====================================*/
149         /* Loop over all the sets of this size */
150         /*=====================================*/
151 
152         for (sets_left=set_count;sets_left>0;sets_left--)
153         {
154 
155             /*||||||||||||||||||||||||||||||||||||||||||||*/
156             /* Set up output pointers for the current set */
157             /*||||||||||||||||||||||||||||||||||||||||||||*/
158 
159             out_ptr_low    = next_out_base;
160             next_out_base  = next_out_base + set_span;
161             out_ptr_high   = next_out_base;
162 
163             /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
164             /* Loop over all the butterflies in the current set */
165             /*||||||||||||||||||||||||||||||||||||||||||||||||||*/
166 
167             do
168             {
169                 in_val_low      = *in_ptr++;
170                 in_val_high     = *in_ptr++;
171                 acca            = L_add(in_val_low,in_val_high);
172                 acca            = L_shr(acca,1);
173                 out_val_low     = extract_l(acca);
174 
175                 acca            = L_sub(in_val_low,in_val_high);
176                 acca            = L_shr(acca,1);
177                 out_val_high    = extract_l(acca);
178 
179                 *out_ptr_low++  = out_val_low;
180                 *--out_ptr_high = out_val_high;
181 
182                 test();
183             } while (out_ptr_low < out_ptr_high);
184 
185         } /* End of loop over sets of the current size */
186 
187         /*============================================================*/
188         /* Decide which buffers to use as input and output next time. */
189         /* Except for the first time (when the input buffer is the    */
190         /* subroutine input) we just alternate the local buffers.     */
191         /*============================================================*/
192 
193         in_buffer = out_buffer;
194         move16();
195         if (out_buffer == buffer_a)
196             out_buffer = buffer_b;
197         else
198             out_buffer = buffer_a;
199         index = add(index,1);
200 
201     } /* End of loop over set sizes */
202 
203 
204     /*++++++++++++++++++++++++++++++++*/
205     /* Do N/2 two-point transforms,   */
206     /* where N =  1 << DCT_LENGTH_LOG */
207     /*++++++++++++++++++++++++++++++++*/
208 
209     pair_ptr = in_buffer;
210     move16();
211 
212     buffer_swap = buffer_c;
213     move16();
214 
215     temp = sub(dct_length_log,1);
216     temp = shl(1,temp);
217 
218     for (pairs_left=temp; pairs_left > 0; pairs_left--)
219     {
220         for ( k=0; k<CORE_SIZE; k++ )
221         {
222             sum=0L;
223             move32();
224             for ( i=0; i<CORE_SIZE; i++ )
225             {
226                 sum = L_mac(sum, pair_ptr[i],dct_core_a[i][k]);
227             }
228             buffer_swap[k] = round(sum);
229         }
230         /* address arithmetic */
231         pair_ptr   += CORE_SIZE;
232         buffer_swap += CORE_SIZE;
233     }
234 
235     for (i=0;i<dct_length;i++)
236     {
237         in_buffer[i] = buffer_c[i];
238         move16();
239     }
240 
241     table_ptr_ptr = a_cos_msin_table;
242 
243     /*++++++++++++++++++++++++++++++*/
244     /* Perform rotation butterflies */
245     /*++++++++++++++++++++++++++++++*/
246     temp = sub(dct_length_log,2);
247     for (set_count_log = temp; set_count_log >= 0;    set_count_log--)
248     {
249         /*===========================================================*/
250         /* Initialization for the loop over sets at the current size */
251         /*===========================================================*/
252         /*    set_span      = 1 << (DCT_LENGTH_LOG - set_count_log); */
253         set_span = shr(dct_length,set_count_log);
254 
255         set_count     = shl(1,set_count_log);
256         next_in_base  = in_buffer;
257         move16();
258 
259         test();
260         if (set_count_log == 0)
261         {
262             next_out_base = output;
263         }
264         else
265         {
266             next_out_base = out_buffer;
267         }
268 
269 
270         /*=====================================*/
271         /* Loop over all the sets of this size */
272         /*=====================================*/
273         for (sets_left = set_count; sets_left > 0;sets_left--)
274         {
275             /*|||||||||||||||||||||||||||||||||||||||||*/
276             /* Set up the pointers for the current set */
277             /*|||||||||||||||||||||||||||||||||||||||||*/
278             in_ptr_low     = next_in_base;
279             move16();
280             temp           = shr(set_span,1);
281 
282             /* address arithmetic */
283             in_ptr_high    = in_ptr_low + temp;
284             next_in_base  += set_span;
285             out_ptr_low    = next_out_base;
286             next_out_base += set_span;
287             out_ptr_high   = next_out_base;
288             cos_msin_ptr   = *table_ptr_ptr;
289 
290             /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
291             /* Loop over all the butterfly pairs in the current set */
292             /*||||||||||||||||||||||||||||||||||||||||||||||||||||||*/
293 
294             do
295             {
296                 /* address arithmetic */
297                 in_low_even     = *in_ptr_low++;
298                 in_low_odd      = *in_ptr_low++;
299                 in_high_even    = *in_ptr_high++;
300                 in_high_odd     = *in_ptr_high++;
301                 cos_even        = cos_msin_ptr[0].cosine;
302                 move16();
303                 msin_even       = cos_msin_ptr[0].minus_sine;
304                 move16();
305                 cos_odd         = cos_msin_ptr[1].cosine;
306                 move16();
307                 msin_odd        = cos_msin_ptr[1].minus_sine;
308                 move16();
309                 cos_msin_ptr   += 2;
310 
311                 sum = 0L;
312                 sum=L_mac(sum,cos_even,in_low_even);
313                 neg_msin_even = negate(msin_even);
314                 sum=L_mac(sum,neg_msin_even,in_high_even);
315                 out_low_even = round(sum);
316 
317                 sum = 0L;
318                 sum=L_mac(sum,msin_even,in_low_even);
319                 sum=L_mac(sum,cos_even,in_high_even);
320                 out_high_even= round(sum);
321 
322                 sum = 0L;
323                 sum=L_mac(sum,cos_odd,in_low_odd);
324                 sum=L_mac(sum,msin_odd,in_high_odd);
325                 out_low_odd= round(sum);
326 
327                 sum = 0L;
328                 sum=L_mac(sum,msin_odd,in_low_odd);
329                 neg_cos_odd = negate(cos_odd);
330                 sum=L_mac(sum,neg_cos_odd,in_high_odd);
331                 out_high_odd= round(sum);
332 
333                 *out_ptr_low++  = out_low_even;
334                 *--out_ptr_high = out_high_even;
335                 *out_ptr_low++  = out_low_odd;
336                 *--out_ptr_high = out_high_odd;
337                 test();
338             } while (out_ptr_low < out_ptr_high);
339 
340         } /* End of loop over sets of the current size */
341 
342         /*=============================================*/
343         /* Swap input and output buffers for next time */
344         /*=============================================*/
345 
346         buffer_swap = in_buffer;
347         in_buffer   = out_buffer;
348         out_buffer  = buffer_swap;
349         table_ptr_ptr++;
350     }
351 }
352 
353