1 /*
2 * jrevdct.c
3 *
4 * Copyright (C) 1991, 1992, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the basic inverse-DCT transformation subroutine.
9 *
10 * This implementation is based on an algorithm described in
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
14 * The primary algorithm described there uses 11 multiplies and 29 adds.
15 * We use their alternate method with 12 multiplies and 32 adds.
16 * The advantage of this method is that no data path contains more than one
17 * multiplication; this allows a very simple and accurate implementation in
18 * scaled fixed-point arithmetic, with a minimal number of shifts.
19 *
20 * I've made lots of modifications to attempt to take advantage of the
21 * sparse nature of the DCT matrices we're getting. Although the logic
22 * is cumbersome, it's straightforward and the resulting code is much
23 * faster.
24 *
25 * A better way to do this would be to pass in the DCT block as a sparse
26 * matrix, perhaps with the difference cases encoded.
27 */
28
29 #define _XOPEN_SOURCE 500 /* get M_PI in math.h */
30
31 #include <memory.h>
32 #include <math.h>
33
34 #include "all.h"
35 #include "dct.h"
36
37
38 #define CONST_BITS 13
39
40 /*
41 * This routine is specialized to the case DCTSIZE = 8.
42 */
43
44 #if DCTSIZE != 8
45 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
46 #endif
47
48
49 /*
50 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
51 * on each column. Direct algorithms are also available, but they are
52 * much more complex and seem not to be any faster when reduced to code.
53 *
54 * The poop on this scaling stuff is as follows:
55 *
56 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
57 * larger than the true IDCT outputs. The final outputs are therefore
58 * a factor of N larger than desired; since N=8 this can be cured by
59 * a simple right shift at the end of the algorithm. The advantage of
60 * this arrangement is that we save two multiplications per 1-D IDCT,
61 * because the y0 and y4 inputs need not be divided by sqrt(N).
62 *
63 * We have to do addition and subtraction of the integer inputs, which
64 * is no problem, and multiplication by fractional constants, which is
65 * a problem to do in integer arithmetic. We multiply all the constants
66 * by CONST_SCALE and convert them to integer constants (thus retaining
67 * CONST_BITS bits of precision in the constants). After doing a
68 * multiplication we have to divide the product by CONST_SCALE, with proper
69 * rounding, to produce the correct output. This division can be done
70 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
71 * as long as possible so that partial sums can be added together with
72 * full fractional precision.
73 *
74 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
75 * they are represented to better-than-integral precision. These outputs
76 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
77 * with the recommended scaling. (To scale up 12-bit sample data further, an
78 * intermediate int32 array would be needed.)
79 *
80 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
81 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
82 * shows that the values given below are the most effective.
83 */
84
85 #ifdef EIGHT_BIT_SAMPLES
86 #define PASS1_BITS 2
87 #else
88 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
89 #endif
90
91 #define ONE ((int32) 1)
92
93 #define CONST_SCALE (ONE << CONST_BITS)
94
95 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
96 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
97 * you will pay a significant penalty in run time. In that case, figure
98 * the correct integer constant values and insert them by hand.
99 */
100
101 /* Actually FIX is no longer used, we precomputed them all */
102 #define FIX(x) ((int32) ((x) * CONST_SCALE + 0.5))
103
104 /* Descale and correctly round an int32 value that's scaled by N bits.
105 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
106 * the fudge factor is correct for either sign of X.
107 */
108
109 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
110
111 /* Multiply an int32 variable by an int32 constant to yield an int32 result.
112 * For 8-bit samples with the recommended scaling, all the variable
113 * and constant values involved are no more than 16 bits wide, so a
114 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
115 * this provides a useful speedup on many machines.
116 * There is no way to specify a 16x16->32 multiply in portable C, but
117 * some C compilers will do the right thing if you provide the correct
118 * combination of casts.
119 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
120 */
121
122 #ifdef EIGHT_BIT_SAMPLES
123 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
124 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
125 #endif
126 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
127 #define MULTIPLY(var,const) (((INT16) (var)) * ((int32) (const)))
128 #endif
129 #endif
130
131 #ifndef MULTIPLY /* default definition */
132 #define MULTIPLY(var,const) ((var) * (const))
133 #endif
134
135
136 /*
137 Unlike our decoder where we approximate the FIXes, we need to use exact
138 ones here or successive P-frames will drift too much with Reference frame coding
139 */
140 #define FIX_0_211164243 1730
141 #define FIX_0_275899380 2260
142 #define FIX_0_298631336 2446
143 #define FIX_0_390180644 3196
144 #define FIX_0_509795579 4176
145 #define FIX_0_541196100 4433
146 #define FIX_0_601344887 4926
147 #define FIX_0_765366865 6270
148 #define FIX_0_785694958 6436
149 #define FIX_0_899976223 7373
150 #define FIX_1_061594337 8697
151 #define FIX_1_111140466 9102
152 #define FIX_1_175875602 9633
153 #define FIX_1_306562965 10703
154 #define FIX_1_387039845 11363
155 #define FIX_1_451774981 11893
156 #define FIX_1_501321110 12299
157 #define FIX_1_662939225 13623
158 #define FIX_1_847759065 15137
159 #define FIX_1_961570560 16069
160 #define FIX_2_053119869 16819
161 #define FIX_2_172734803 17799
162 #define FIX_2_562915447 20995
163 #define FIX_3_072711026 25172
164
165 /*
166 Switch on reverse_dct choices
167 */
168 void reference_rev_dct (int16 *block);
169 void mpeg_jrevdct_quick (int16 *block);
170 void init_idctref (void);
171
172 extern boolean pureDCT;
173
174 void
mpeg_jrevdct(data)175 mpeg_jrevdct(data)
176 DCTBLOCK data;
177 {
178 if (pureDCT) reference_rev_dct(data);
179 else mpeg_jrevdct_quick(data);
180 }
181
182 /*
183 * Perform the inverse DCT on one block of coefficients.
184 */
185
186 void
mpeg_jrevdct_quick(data)187 mpeg_jrevdct_quick(data)
188 DCTBLOCK data;
189 {
190 int32 tmp0, tmp1, tmp2, tmp3;
191 int32 tmp10, tmp11, tmp12, tmp13;
192 int32 z1, z2, z3, z4, z5;
193 int32 d0, d1, d2, d3, d4, d5, d6, d7;
194 register DCTELEM *dataptr;
195 int rowctr;
196 SHIFT_TEMPS
197
198 /* Pass 1: process rows. */
199 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
200 /* furthermore, we scale the results by 2**PASS1_BITS. */
201
202 dataptr = data;
203
204 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
205 /* Because of quantization, we will usually find that many of the input
206 * coefficients are zero, especially the AC terms. We can exploit this
207 * by short-circuiting the IDCT calculation for any row in which all
208 * the AC terms are zero. In that case each output is equal to the
209 * DC coefficient (with scale factor as needed).
210 * With typical images and quantization tables, half or more of the
211 * row DCT calculations can be simplified this way.
212 */
213
214 register int *idataptr = (int*)dataptr;
215 d0 = dataptr[0];
216 d1 = dataptr[1];
217 if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
218 /* AC terms all zero */
219 if (d0) {
220 /* Compute a 32 bit value to assign. */
221 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
222 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
223
224 idataptr[0] = v;
225 idataptr[1] = v;
226 idataptr[2] = v;
227 idataptr[3] = v;
228 }
229
230 dataptr += DCTSIZE; /* advance pointer to next row */
231 continue;
232 }
233 d2 = dataptr[2];
234 d3 = dataptr[3];
235 d4 = dataptr[4];
236 d5 = dataptr[5];
237 d6 = dataptr[6];
238 d7 = dataptr[7];
239
240 /* Even part: reverse the even part of the forward DCT. */
241 /* The rotator is sqrt(2)*c(-6). */
242 {
243 if (d6) {
244 if (d4) {
245 if (d2) {
246 if (d0) {
247 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
248 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
249 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
250 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
251
252 tmp0 = (d0 + d4) << CONST_BITS;
253 tmp1 = (d0 - d4) << CONST_BITS;
254
255 tmp10 = tmp0 + tmp3;
256 tmp13 = tmp0 - tmp3;
257 tmp11 = tmp1 + tmp2;
258 tmp12 = tmp1 - tmp2;
259 } else {
260 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
261 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
262 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
263 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
264
265 tmp0 = d4 << CONST_BITS;
266
267 tmp10 = tmp0 + tmp3;
268 tmp13 = tmp0 - tmp3;
269 tmp11 = tmp2 - tmp0;
270 tmp12 = -(tmp0 + tmp2);
271 }
272 } else {
273 if (d0) {
274 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
275 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
276 tmp3 = MULTIPLY(d6, FIX_0_541196100);
277
278 tmp0 = (d0 + d4) << CONST_BITS;
279 tmp1 = (d0 - d4) << CONST_BITS;
280
281 tmp10 = tmp0 + tmp3;
282 tmp13 = tmp0 - tmp3;
283 tmp11 = tmp1 + tmp2;
284 tmp12 = tmp1 - tmp2;
285 } else {
286 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
287 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
288 tmp3 = MULTIPLY(d6, FIX_0_541196100);
289
290 tmp0 = d4 << CONST_BITS;
291
292 tmp10 = tmp0 + tmp3;
293 tmp13 = tmp0 - tmp3;
294 tmp11 = tmp2 - tmp0;
295 tmp12 = -(tmp0 + tmp2);
296 }
297 }
298 } else {
299 if (d2) {
300 if (d0) {
301 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
302 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
303 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
304 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
305
306 tmp0 = d0 << CONST_BITS;
307
308 tmp10 = tmp0 + tmp3;
309 tmp13 = tmp0 - tmp3;
310 tmp11 = tmp0 + tmp2;
311 tmp12 = tmp0 - tmp2;
312 } else {
313 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
314 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
315 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
316 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
317
318 tmp10 = tmp3;
319 tmp13 = -tmp3;
320 tmp11 = tmp2;
321 tmp12 = -tmp2;
322 }
323 } else {
324 if (d0) {
325 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
326 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
327 tmp3 = MULTIPLY(d6, FIX_0_541196100);
328
329 tmp0 = d0 << CONST_BITS;
330
331 tmp10 = tmp0 + tmp3;
332 tmp13 = tmp0 - tmp3;
333 tmp11 = tmp0 + tmp2;
334 tmp12 = tmp0 - tmp2;
335 } else {
336 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
337 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
338 tmp3 = MULTIPLY(d6, FIX_0_541196100);
339
340 tmp10 = tmp3;
341 tmp13 = -tmp3;
342 tmp11 = tmp2;
343 tmp12 = -tmp2;
344 }
345 }
346 }
347 } else {
348 if (d4) {
349 if (d2) {
350 if (d0) {
351 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
352 tmp2 = MULTIPLY(d2, FIX_0_541196100);
353 tmp3 = MULTIPLY(d2, FIX_1_306562965);
354
355 tmp0 = (d0 + d4) << CONST_BITS;
356 tmp1 = (d0 - d4) << CONST_BITS;
357
358 tmp10 = tmp0 + tmp3;
359 tmp13 = tmp0 - tmp3;
360 tmp11 = tmp1 + tmp2;
361 tmp12 = tmp1 - tmp2;
362 } else {
363 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
364 tmp2 = MULTIPLY(d2, FIX_0_541196100);
365 tmp3 = MULTIPLY(d2, FIX_1_306562965);
366
367 tmp0 = d4 << CONST_BITS;
368
369 tmp10 = tmp0 + tmp3;
370 tmp13 = tmp0 - tmp3;
371 tmp11 = tmp2 - tmp0;
372 tmp12 = -(tmp0 + tmp2);
373 }
374 } else {
375 if (d0) {
376 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
377 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
378 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
379 } else {
380 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
381 tmp10 = tmp13 = d4 << CONST_BITS;
382 tmp11 = tmp12 = -tmp10;
383 }
384 }
385 } else {
386 if (d2) {
387 if (d0) {
388 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
389 tmp2 = MULTIPLY(d2, FIX_0_541196100);
390 tmp3 = MULTIPLY(d2, FIX_1_306562965);
391
392 tmp0 = d0 << CONST_BITS;
393
394 tmp10 = tmp0 + tmp3;
395 tmp13 = tmp0 - tmp3;
396 tmp11 = tmp0 + tmp2;
397 tmp12 = tmp0 - tmp2;
398 } else {
399 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
400 tmp2 = MULTIPLY(d2, FIX_0_541196100);
401 tmp3 = MULTIPLY(d2, FIX_1_306562965);
402
403 tmp10 = tmp3;
404 tmp13 = -tmp3;
405 tmp11 = tmp2;
406 tmp12 = -tmp2;
407 }
408 } else {
409 if (d0) {
410 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
411 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
412 } else {
413 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
414 tmp10 = tmp13 = tmp11 = tmp12 = 0;
415 }
416 }
417 }
418 }
419
420 /* Odd part per figure 8; the matrix is unitary and hence its
421 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
422 */
423
424 if (d7) {
425 if (d5) {
426 if (d3) {
427 if (d1) {
428 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
429 z1 = d7 + d1;
430 z2 = d5 + d3;
431 z3 = d7 + d3;
432 z4 = d5 + d1;
433 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
434
435 tmp0 = MULTIPLY(d7, FIX_0_298631336);
436 tmp1 = MULTIPLY(d5, FIX_2_053119869);
437 tmp2 = MULTIPLY(d3, FIX_3_072711026);
438 tmp3 = MULTIPLY(d1, FIX_1_501321110);
439 z1 = MULTIPLY(-z1, FIX_0_899976223);
440 z2 = MULTIPLY(-z2, FIX_2_562915447);
441 z3 = MULTIPLY(-z3, FIX_1_961570560);
442 z4 = MULTIPLY(-z4, FIX_0_390180644);
443
444 z3 += z5;
445 z4 += z5;
446
447 tmp0 += z1 + z3;
448 tmp1 += z2 + z4;
449 tmp2 += z2 + z3;
450 tmp3 += z1 + z4;
451 } else {
452 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
453 z2 = d5 + d3;
454 z3 = d7 + d3;
455 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
456
457 tmp0 = MULTIPLY(d7, FIX_0_298631336);
458 tmp1 = MULTIPLY(d5, FIX_2_053119869);
459 tmp2 = MULTIPLY(d3, FIX_3_072711026);
460 z1 = MULTIPLY(-d7, FIX_0_899976223);
461 z2 = MULTIPLY(-z2, FIX_2_562915447);
462 z3 = MULTIPLY(-z3, FIX_1_961570560);
463 z4 = MULTIPLY(-d5, FIX_0_390180644);
464
465 z3 += z5;
466 z4 += z5;
467
468 tmp0 += z1 + z3;
469 tmp1 += z2 + z4;
470 tmp2 += z2 + z3;
471 tmp3 = z1 + z4;
472 }
473 } else {
474 if (d1) {
475 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
476 z1 = d7 + d1;
477 z4 = d5 + d1;
478 z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
479
480 tmp0 = MULTIPLY(d7, FIX_0_298631336);
481 tmp1 = MULTIPLY(d5, FIX_2_053119869);
482 tmp3 = MULTIPLY(d1, FIX_1_501321110);
483 z1 = MULTIPLY(-z1, FIX_0_899976223);
484 z2 = MULTIPLY(-d5, FIX_2_562915447);
485 z3 = MULTIPLY(-d7, FIX_1_961570560);
486 z4 = MULTIPLY(-z4, FIX_0_390180644);
487
488 z3 += z5;
489 z4 += z5;
490
491 tmp0 += z1 + z3;
492 tmp1 += z2 + z4;
493 tmp2 = z2 + z3;
494 tmp3 += z1 + z4;
495 } else {
496 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
497 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
498 z1 = MULTIPLY(-d7, FIX_0_899976223);
499 z3 = MULTIPLY(-d7, FIX_1_961570560);
500 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
501 z2 = MULTIPLY(-d5, FIX_2_562915447);
502 z4 = MULTIPLY(-d5, FIX_0_390180644);
503 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
504
505 z3 += z5;
506 z4 += z5;
507
508 tmp0 += z3;
509 tmp1 += z4;
510 tmp2 = z2 + z3;
511 tmp3 = z1 + z4;
512 }
513 }
514 } else {
515 if (d3) {
516 if (d1) {
517 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
518 z1 = d7 + d1;
519 z3 = d7 + d3;
520 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
521
522 tmp0 = MULTIPLY(d7, FIX_0_298631336);
523 tmp2 = MULTIPLY(d3, FIX_3_072711026);
524 tmp3 = MULTIPLY(d1, FIX_1_501321110);
525 z1 = MULTIPLY(-z1, FIX_0_899976223);
526 z2 = MULTIPLY(-d3, FIX_2_562915447);
527 z3 = MULTIPLY(-z3, FIX_1_961570560);
528 z4 = MULTIPLY(-d1, FIX_0_390180644);
529
530 z3 += z5;
531 z4 += z5;
532
533 tmp0 += z1 + z3;
534 tmp1 = z2 + z4;
535 tmp2 += z2 + z3;
536 tmp3 += z1 + z4;
537 } else {
538 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
539 z3 = d7 + d3;
540
541 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
542 z1 = MULTIPLY(-d7, FIX_0_899976223);
543 tmp2 = MULTIPLY(d3, FIX_0_509795579);
544 z2 = MULTIPLY(-d3, FIX_2_562915447);
545 z5 = MULTIPLY(z3, FIX_1_175875602);
546 z3 = MULTIPLY(-z3, FIX_0_785694958);
547
548 tmp0 += z3;
549 tmp1 = z2 + z5;
550 tmp2 += z3;
551 tmp3 = z1 + z5;
552 }
553 } else {
554 if (d1) {
555 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
556 z1 = d7 + d1;
557 z5 = MULTIPLY(z1, FIX_1_175875602);
558
559 z1 = MULTIPLY(z1, FIX_0_275899380);
560 z3 = MULTIPLY(-d7, FIX_1_961570560);
561 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
562 z4 = MULTIPLY(-d1, FIX_0_390180644);
563 tmp3 = MULTIPLY(d1, FIX_1_111140466);
564
565 tmp0 += z1;
566 tmp1 = z4 + z5;
567 tmp2 = z3 + z5;
568 tmp3 += z1;
569 } else {
570 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
571 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
572 tmp1 = MULTIPLY(d7, FIX_1_175875602);
573 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
574 tmp3 = MULTIPLY(d7, FIX_0_275899380);
575 }
576 }
577 }
578 } else {
579 if (d5) {
580 if (d3) {
581 if (d1) {
582 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
583 z2 = d5 + d3;
584 z4 = d5 + d1;
585 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
586
587 tmp1 = MULTIPLY(d5, FIX_2_053119869);
588 tmp2 = MULTIPLY(d3, FIX_3_072711026);
589 tmp3 = MULTIPLY(d1, FIX_1_501321110);
590 z1 = MULTIPLY(-d1, FIX_0_899976223);
591 z2 = MULTIPLY(-z2, FIX_2_562915447);
592 z3 = MULTIPLY(-d3, FIX_1_961570560);
593 z4 = MULTIPLY(-z4, FIX_0_390180644);
594
595 z3 += z5;
596 z4 += z5;
597
598 tmp0 = z1 + z3;
599 tmp1 += z2 + z4;
600 tmp2 += z2 + z3;
601 tmp3 += z1 + z4;
602 } else {
603 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
604 z2 = d5 + d3;
605
606 z5 = MULTIPLY(z2, FIX_1_175875602);
607 tmp1 = MULTIPLY(d5, FIX_1_662939225);
608 z4 = MULTIPLY(-d5, FIX_0_390180644);
609 z2 = MULTIPLY(-z2, FIX_1_387039845);
610 tmp2 = MULTIPLY(d3, FIX_1_111140466);
611 z3 = MULTIPLY(-d3, FIX_1_961570560);
612
613 tmp0 = z3 + z5;
614 tmp1 += z2;
615 tmp2 += z2;
616 tmp3 = z4 + z5;
617 }
618 } else {
619 if (d1) {
620 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
621 z4 = d5 + d1;
622
623 z5 = MULTIPLY(z4, FIX_1_175875602);
624 z1 = MULTIPLY(-d1, FIX_0_899976223);
625 tmp3 = MULTIPLY(d1, FIX_0_601344887);
626 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
627 z2 = MULTIPLY(-d5, FIX_2_562915447);
628 z4 = MULTIPLY(z4, FIX_0_785694958);
629
630 tmp0 = z1 + z5;
631 tmp1 += z4;
632 tmp2 = z2 + z5;
633 tmp3 += z4;
634 } else {
635 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
636 tmp0 = MULTIPLY(d5, FIX_1_175875602);
637 tmp1 = MULTIPLY(d5, FIX_0_275899380);
638 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
639 tmp3 = MULTIPLY(d5, FIX_0_785694958);
640 }
641 }
642 } else {
643 if (d3) {
644 if (d1) {
645 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
646 z5 = d1 + d3;
647 tmp3 = MULTIPLY(d1, FIX_0_211164243);
648 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
649 z1 = MULTIPLY(d1, FIX_1_061594337);
650 z2 = MULTIPLY(-d3, FIX_2_172734803);
651 z4 = MULTIPLY(z5, FIX_0_785694958);
652 z5 = MULTIPLY(z5, FIX_1_175875602);
653
654 tmp0 = z1 - z4;
655 tmp1 = z2 + z4;
656 tmp2 += z5;
657 tmp3 += z5;
658 } else {
659 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
660 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
661 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
662 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
663 tmp3 = MULTIPLY(d3, FIX_1_175875602);
664 }
665 } else {
666 if (d1) {
667 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
668 tmp0 = MULTIPLY(d1, FIX_0_275899380);
669 tmp1 = MULTIPLY(d1, FIX_0_785694958);
670 tmp2 = MULTIPLY(d1, FIX_1_175875602);
671 tmp3 = MULTIPLY(d1, FIX_1_387039845);
672 } else {
673 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
674 tmp0 = tmp1 = tmp2 = tmp3 = 0;
675 }
676 }
677 }
678 }
679 }
680 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
681
682 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
683 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
684 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
685 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
686 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
687 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
688 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
689 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
690
691 dataptr += DCTSIZE; /* advance pointer to next row */
692 }
693
694 /* Pass 2: process columns. */
695 /* Note that we must descale the results by a factor of 8 == 2**3, */
696 /* and also undo the PASS1_BITS scaling. */
697
698 dataptr = data;
699 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
700 /* Columns of zeroes can be exploited in the same way as we did with rows.
701 * However, the row calculation has created many nonzero AC terms, so the
702 * simplification applies less often (typically 5% to 10% of the time).
703 * On machines with very fast multiplication, it's possible that the
704 * test takes more time than it's worth. In that case this section
705 * may be commented out.
706 */
707
708 d0 = dataptr[DCTSIZE*0];
709 d1 = dataptr[DCTSIZE*1];
710 d2 = dataptr[DCTSIZE*2];
711 d3 = dataptr[DCTSIZE*3];
712 d4 = dataptr[DCTSIZE*4];
713 d5 = dataptr[DCTSIZE*5];
714 d6 = dataptr[DCTSIZE*6];
715 d7 = dataptr[DCTSIZE*7];
716
717 /* Even part: reverse the even part of the forward DCT. */
718 /* The rotator is sqrt(2)*c(-6). */
719 if (d6) {
720 if (d4) {
721 if (d2) {
722 if (d0) {
723 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
724 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
725 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
726 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
727
728 tmp0 = (d0 + d4) << CONST_BITS;
729 tmp1 = (d0 - d4) << CONST_BITS;
730
731 tmp10 = tmp0 + tmp3;
732 tmp13 = tmp0 - tmp3;
733 tmp11 = tmp1 + tmp2;
734 tmp12 = tmp1 - tmp2;
735 } else {
736 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
737 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
738 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
739 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
740
741 tmp0 = d4 << CONST_BITS;
742
743 tmp10 = tmp0 + tmp3;
744 tmp13 = tmp0 - tmp3;
745 tmp11 = tmp2 - tmp0;
746 tmp12 = -(tmp0 + tmp2);
747 }
748 } else {
749 if (d0) {
750 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
751 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
752 tmp3 = MULTIPLY(d6, FIX_0_541196100);
753
754 tmp0 = (d0 + d4) << CONST_BITS;
755 tmp1 = (d0 - d4) << CONST_BITS;
756
757 tmp10 = tmp0 + tmp3;
758 tmp13 = tmp0 - tmp3;
759 tmp11 = tmp1 + tmp2;
760 tmp12 = tmp1 - tmp2;
761 } else {
762 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
763 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
764 tmp3 = MULTIPLY(d6, FIX_0_541196100);
765
766 tmp0 = d4 << CONST_BITS;
767
768 tmp10 = tmp0 + tmp3;
769 tmp13 = tmp0 - tmp3;
770 tmp11 = tmp2 - tmp0;
771 tmp12 = -(tmp0 + tmp2);
772 }
773 }
774 } else {
775 if (d2) {
776 if (d0) {
777 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
778 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
779 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
780 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
781
782 tmp0 = d0 << CONST_BITS;
783
784 tmp10 = tmp0 + tmp3;
785 tmp13 = tmp0 - tmp3;
786 tmp11 = tmp0 + tmp2;
787 tmp12 = tmp0 - tmp2;
788 } else {
789 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
790 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
791 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
792 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
793
794 tmp10 = tmp3;
795 tmp13 = -tmp3;
796 tmp11 = tmp2;
797 tmp12 = -tmp2;
798 }
799 } else {
800 if (d0) {
801 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
802 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
803 tmp3 = MULTIPLY(d6, FIX_0_541196100);
804
805 tmp0 = d0 << CONST_BITS;
806
807 tmp10 = tmp0 + tmp3;
808 tmp13 = tmp0 - tmp3;
809 tmp11 = tmp0 + tmp2;
810 tmp12 = tmp0 - tmp2;
811 } else {
812 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
813 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
814 tmp3 = MULTIPLY(d6, FIX_0_541196100);
815
816 tmp10 = tmp3;
817 tmp13 = -tmp3;
818 tmp11 = tmp2;
819 tmp12 = -tmp2;
820 }
821 }
822 }
823 } else {
824 if (d4) {
825 if (d2) {
826 if (d0) {
827 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
828 tmp2 = MULTIPLY(d2, FIX_0_541196100);
829 tmp3 = MULTIPLY(d2, FIX_1_306562965);
830
831 tmp0 = (d0 + d4) << CONST_BITS;
832 tmp1 = (d0 - d4) << CONST_BITS;
833
834 tmp10 = tmp0 + tmp3;
835 tmp13 = tmp0 - tmp3;
836 tmp11 = tmp1 + tmp2;
837 tmp12 = tmp1 - tmp2;
838 } else {
839 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
840 tmp2 = MULTIPLY(d2, FIX_0_541196100);
841 tmp3 = MULTIPLY(d2, FIX_1_306562965);
842
843 tmp0 = d4 << CONST_BITS;
844
845 tmp10 = tmp0 + tmp3;
846 tmp13 = tmp0 - tmp3;
847 tmp11 = tmp2 - tmp0;
848 tmp12 = -(tmp0 + tmp2);
849 }
850 } else {
851 if (d0) {
852 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
853 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
854 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
855 } else {
856 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
857 tmp10 = tmp13 = d4 << CONST_BITS;
858 tmp11 = tmp12 = -tmp10;
859 }
860 }
861 } else {
862 if (d2) {
863 if (d0) {
864 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
865 tmp2 = MULTIPLY(d2, FIX_0_541196100);
866 tmp3 = MULTIPLY(d2, FIX_1_306562965);
867
868 tmp0 = d0 << CONST_BITS;
869
870 tmp10 = tmp0 + tmp3;
871 tmp13 = tmp0 - tmp3;
872 tmp11 = tmp0 + tmp2;
873 tmp12 = tmp0 - tmp2;
874 } else {
875 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
876 tmp2 = MULTIPLY(d2, FIX_0_541196100);
877 tmp3 = MULTIPLY(d2, FIX_1_306562965);
878
879 tmp10 = tmp3;
880 tmp13 = -tmp3;
881 tmp11 = tmp2;
882 tmp12 = -tmp2;
883 }
884 } else {
885 if (d0) {
886 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
887 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
888 } else {
889 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
890 tmp10 = tmp13 = tmp11 = tmp12 = 0;
891 }
892 }
893 }
894 }
895
896 /* Odd part per figure 8; the matrix is unitary and hence its
897 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
898 */
899 if (d7) {
900 if (d5) {
901 if (d3) {
902 if (d1) {
903 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
904 z1 = d7 + d1;
905 z2 = d5 + d3;
906 z3 = d7 + d3;
907 z4 = d5 + d1;
908 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
909
910 tmp0 = MULTIPLY(d7, FIX_0_298631336);
911 tmp1 = MULTIPLY(d5, FIX_2_053119869);
912 tmp2 = MULTIPLY(d3, FIX_3_072711026);
913 tmp3 = MULTIPLY(d1, FIX_1_501321110);
914 z1 = MULTIPLY(-z1, FIX_0_899976223);
915 z2 = MULTIPLY(-z2, FIX_2_562915447);
916 z3 = MULTIPLY(-z3, FIX_1_961570560);
917 z4 = MULTIPLY(-z4, FIX_0_390180644);
918
919 z3 += z5;
920 z4 += z5;
921
922 tmp0 += z1 + z3;
923 tmp1 += z2 + z4;
924 tmp2 += z2 + z3;
925 tmp3 += z1 + z4;
926 } else {
927 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
928 z1 = d7;
929 z2 = d5 + d3;
930 z3 = d7 + d3;
931 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
932
933 tmp0 = MULTIPLY(d7, FIX_0_298631336);
934 tmp1 = MULTIPLY(d5, FIX_2_053119869);
935 tmp2 = MULTIPLY(d3, FIX_3_072711026);
936 z1 = MULTIPLY(-d7, FIX_0_899976223);
937 z2 = MULTIPLY(-z2, FIX_2_562915447);
938 z3 = MULTIPLY(-z3, FIX_1_961570560);
939 z4 = MULTIPLY(-d5, FIX_0_390180644);
940
941 z3 += z5;
942 z4 += z5;
943
944 tmp0 += z1 + z3;
945 tmp1 += z2 + z4;
946 tmp2 += z2 + z3;
947 tmp3 = z1 + z4;
948 }
949 } else {
950 if (d1) {
951 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
952 z1 = d7 + d1;
953 z2 = d5;
954 z3 = d7;
955 z4 = d5 + d1;
956 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
957
958 tmp0 = MULTIPLY(d7, FIX_0_298631336);
959 tmp1 = MULTIPLY(d5, FIX_2_053119869);
960 tmp3 = MULTIPLY(d1, FIX_1_501321110);
961 z1 = MULTIPLY(-z1, FIX_0_899976223);
962 z2 = MULTIPLY(-d5, FIX_2_562915447);
963 z3 = MULTIPLY(-d7, FIX_1_961570560);
964 z4 = MULTIPLY(-z4, FIX_0_390180644);
965
966 z3 += z5;
967 z4 += z5;
968
969 tmp0 += z1 + z3;
970 tmp1 += z2 + z4;
971 tmp2 = z2 + z3;
972 tmp3 += z1 + z4;
973 } else {
974 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
975 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
976 z1 = MULTIPLY(-d7, FIX_0_899976223);
977 z3 = MULTIPLY(-d7, FIX_1_961570560);
978 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
979 z2 = MULTIPLY(-d5, FIX_2_562915447);
980 z4 = MULTIPLY(-d5, FIX_0_390180644);
981 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
982
983 z3 += z5;
984 z4 += z5;
985
986 tmp0 += z3;
987 tmp1 += z4;
988 tmp2 = z2 + z3;
989 tmp3 = z1 + z4;
990 }
991 }
992 } else {
993 if (d3) {
994 if (d1) {
995 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
996 z1 = d7 + d1;
997 z3 = d7 + d3;
998 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
999
1000 tmp0 = MULTIPLY(d7, FIX_0_298631336);
1001 tmp2 = MULTIPLY(d3, FIX_3_072711026);
1002 tmp3 = MULTIPLY(d1, FIX_1_501321110);
1003 z1 = MULTIPLY(-z1, FIX_0_899976223);
1004 z2 = MULTIPLY(-d3, FIX_2_562915447);
1005 z3 = MULTIPLY(-z3, FIX_1_961570560);
1006 z4 = MULTIPLY(-d1, FIX_0_390180644);
1007
1008 z3 += z5;
1009 z4 += z5;
1010
1011 tmp0 += z1 + z3;
1012 tmp1 = z2 + z4;
1013 tmp2 += z2 + z3;
1014 tmp3 += z1 + z4;
1015 } else {
1016 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1017 z3 = d7 + d3;
1018
1019 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
1020 z1 = MULTIPLY(-d7, FIX_0_899976223);
1021 tmp2 = MULTIPLY(d3, FIX_0_509795579);
1022 z2 = MULTIPLY(-d3, FIX_2_562915447);
1023 z5 = MULTIPLY(z3, FIX_1_175875602);
1024 z3 = MULTIPLY(-z3, FIX_0_785694958);
1025
1026 tmp0 += z3;
1027 tmp1 = z2 + z5;
1028 tmp2 += z3;
1029 tmp3 = z1 + z5;
1030 }
1031 } else {
1032 if (d1) {
1033 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1034 z1 = d7 + d1;
1035 z5 = MULTIPLY(z1, FIX_1_175875602);
1036
1037 z1 = MULTIPLY(z1, FIX_0_275899380);
1038 z3 = MULTIPLY(-d7, FIX_1_961570560);
1039 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
1040 z4 = MULTIPLY(-d1, FIX_0_390180644);
1041 tmp3 = MULTIPLY(d1, FIX_1_111140466);
1042
1043 tmp0 += z1;
1044 tmp1 = z4 + z5;
1045 tmp2 = z3 + z5;
1046 tmp3 += z1;
1047 } else {
1048 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1049 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
1050 tmp1 = MULTIPLY(d7, FIX_1_175875602);
1051 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
1052 tmp3 = MULTIPLY(d7, FIX_0_275899380);
1053 }
1054 }
1055 }
1056 } else {
1057 if (d5) {
1058 if (d3) {
1059 if (d1) {
1060 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1061 z2 = d5 + d3;
1062 z4 = d5 + d1;
1063 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
1064
1065 tmp1 = MULTIPLY(d5, FIX_2_053119869);
1066 tmp2 = MULTIPLY(d3, FIX_3_072711026);
1067 tmp3 = MULTIPLY(d1, FIX_1_501321110);
1068 z1 = MULTIPLY(-d1, FIX_0_899976223);
1069 z2 = MULTIPLY(-z2, FIX_2_562915447);
1070 z3 = MULTIPLY(-d3, FIX_1_961570560);
1071 z4 = MULTIPLY(-z4, FIX_0_390180644);
1072
1073 z3 += z5;
1074 z4 += z5;
1075
1076 tmp0 = z1 + z3;
1077 tmp1 += z2 + z4;
1078 tmp2 += z2 + z3;
1079 tmp3 += z1 + z4;
1080 } else {
1081 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1082 z2 = d5 + d3;
1083
1084 z5 = MULTIPLY(z2, FIX_1_175875602);
1085 tmp1 = MULTIPLY(d5, FIX_1_662939225);
1086 z4 = MULTIPLY(-d5, FIX_0_390180644);
1087 z2 = MULTIPLY(-z2, FIX_1_387039845);
1088 tmp2 = MULTIPLY(d3, FIX_1_111140466);
1089 z3 = MULTIPLY(-d3, FIX_1_961570560);
1090
1091 tmp0 = z3 + z5;
1092 tmp1 += z2;
1093 tmp2 += z2;
1094 tmp3 = z4 + z5;
1095 }
1096 } else {
1097 if (d1) {
1098 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1099 z4 = d5 + d1;
1100
1101 z5 = MULTIPLY(z4, FIX_1_175875602);
1102 z1 = MULTIPLY(-d1, FIX_0_899976223);
1103 tmp3 = MULTIPLY(d1, FIX_0_601344887);
1104 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
1105 z2 = MULTIPLY(-d5, FIX_2_562915447);
1106 z4 = MULTIPLY(z4, FIX_0_785694958);
1107
1108 tmp0 = z1 + z5;
1109 tmp1 += z4;
1110 tmp2 = z2 + z5;
1111 tmp3 += z4;
1112 } else {
1113 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1114 tmp0 = MULTIPLY(d5, FIX_1_175875602);
1115 tmp1 = MULTIPLY(d5, FIX_0_275899380);
1116 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
1117 tmp3 = MULTIPLY(d5, FIX_0_785694958);
1118 }
1119 }
1120 } else {
1121 if (d3) {
1122 if (d1) {
1123 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1124 z5 = d1 + d3;
1125 tmp3 = MULTIPLY(d1, FIX_0_211164243);
1126 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
1127 z1 = MULTIPLY(d1, FIX_1_061594337);
1128 z2 = MULTIPLY(-d3, FIX_2_172734803);
1129 z4 = MULTIPLY(z5, FIX_0_785694958);
1130 z5 = MULTIPLY(z5, FIX_1_175875602);
1131
1132 tmp0 = z1 - z4;
1133 tmp1 = z2 + z4;
1134 tmp2 += z5;
1135 tmp3 += z5;
1136 } else {
1137 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1138 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
1139 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
1140 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
1141 tmp3 = MULTIPLY(d3, FIX_1_175875602);
1142 }
1143 } else {
1144 if (d1) {
1145 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1146 tmp0 = MULTIPLY(d1, FIX_0_275899380);
1147 tmp1 = MULTIPLY(d1, FIX_0_785694958);
1148 tmp2 = MULTIPLY(d1, FIX_1_175875602);
1149 tmp3 = MULTIPLY(d1, FIX_1_387039845);
1150 } else {
1151 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1152 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1153 }
1154 }
1155 }
1156 }
1157
1158 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1159
1160 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
1161 CONST_BITS+PASS1_BITS+3);
1162 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
1163 CONST_BITS+PASS1_BITS+3);
1164 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
1165 CONST_BITS+PASS1_BITS+3);
1166 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
1167 CONST_BITS+PASS1_BITS+3);
1168 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
1169 CONST_BITS+PASS1_BITS+3);
1170 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
1171 CONST_BITS+PASS1_BITS+3);
1172 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
1173 CONST_BITS+PASS1_BITS+3);
1174 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
1175 CONST_BITS+PASS1_BITS+3);
1176
1177 dataptr++; /* advance pointer to next column */
1178 }
1179 }
1180
1181
1182 /* here is the reference one, in case of problems with the normal one */
1183
1184 /* idctref.c, Inverse Discrete Fourier Transform, double precision */
1185
1186 /* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */
1187
1188 /*
1189 * Disclaimer of Warranty
1190 *
1191 * These software programs are available to the user without any license fee or
1192 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
1193 * any and all warranties, whether express, implied, or statuary, including any
1194 * implied warranties or merchantability or of fitness for a particular
1195 * purpose. In no event shall the copyright-holder be liable for any
1196 * incidental, punitive, or consequential damages of any kind whatsoever
1197 * arising from the use of these programs.
1198 *
1199 * This disclaimer of warranty extends to the user of these programs and user's
1200 * customers, employees, agents, transferees, successors, and assigns.
1201 *
1202 * The MPEG Software Simulation Group does not represent or warrant that the
1203 * programs furnished hereunder are free of infringement of any third-party
1204 * patents.
1205 *
1206 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
1207 * are subject to royalty fees to patent holders. Many of these patents are
1208 * general enough such that they are unavoidable regardless of implementation
1209 * design.
1210 *
1211 */
1212
1213 /* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
1214 * direct matrix multiply) Inverse Discrete Cosine Transform
1215 */
1216
1217
1218 /* cosine transform matrix for 8x1 IDCT */
1219 static double itrans_coef[8][8];
1220
1221
1222
init_idctref()1223 void init_idctref() {
1224 /*----------------------------------------------------------------------------
1225 initialize DCT coefficient matrix
1226 -----------------------------------------------------------------------------*/
1227 unsigned int freq;
1228
1229 for (freq=0; freq < 8; ++freq) {
1230 double const scale = (freq == 0) ? sqrt(0.125) : 0.5;
1231
1232 unsigned int time;
1233
1234 for (time = 0; time < 8; ++time)
1235 itrans_coef[freq][time] = scale*cos((M_PI/8.0)*freq*(time + 0.5));
1236 }
1237 }
1238
1239
1240
1241 /* perform IDCT matrix multiply for 8x8 coefficient block */
1242
reference_rev_dct(block)1243 void reference_rev_dct(block)
1244 int16 *block;
1245 {
1246 int i, j, k, v;
1247 double partial_product;
1248 double tmp[64];
1249
1250 for (i=0; i<8; i++)
1251 for (j=0; j<8; j++)
1252 {
1253 partial_product = 0.0;
1254
1255 for (k=0; k<8; k++)
1256 partial_product+= itrans_coef[k][j]*block[8*i+k];
1257
1258 tmp[8*i+j] = partial_product;
1259 }
1260
1261 /* Transpose operation is integrated into address mapping by switching
1262 loop order of i and j */
1263
1264 for (j=0; j<8; j++)
1265 for (i=0; i<8; i++)
1266 {
1267 partial_product = 0.0;
1268
1269 for (k=0; k<8; k++)
1270 partial_product+= itrans_coef[k][i]*tmp[8*k+j];
1271
1272 v = floor(partial_product+0.5);
1273 block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v);
1274 }
1275 }
1276