1 /*
2 * jrevdct.c
3 *
4 * Copyright (C) 1991, 1992, Thomas G. Lane.
5 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the basic inverse-DCT transformation subroutine.
9 *
10 * This implementation is based on an algorithm described in
11 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
12 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
13 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
14 * The primary algorithm described there uses 11 multiplies and 29 adds.
15 * We use their alternate method with 12 multiplies and 32 adds.
16 * The advantage of this method is that no data path contains more than one
17 * multiplication; this allows a very simple and accurate implementation in
18 * scaled fixed-point arithmetic, with a minimal number of shifts.
19 *
20 * I've made lots of modifications to attempt to take advantage of the
21 * sparse nature of the DCT matrices we're getting. Although the logic
22 * is cumbersome, it's straightforward and the resulting code is much
23 * faster.
24 *
25 * A better way to do this would be to pass in the DCT block as a sparse
26 * matrix, perhaps with the difference cases encoded.
27 */
28
29 #include <memory.h>
30 #include "all.h"
31 #include "ansi.h"
32 #include "dct.h"
33
34
35 #define CONST_BITS 13
36
37 /*
38 * This routine is specialized to the case DCTSIZE = 8.
39 */
40
41 #if DCTSIZE != 8
42 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
43 #endif
44
45
46 /*
47 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
48 * on each column. Direct algorithms are also available, but they are
49 * much more complex and seem not to be any faster when reduced to code.
50 *
51 * The poop on this scaling stuff is as follows:
52 *
53 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
54 * larger than the true IDCT outputs. The final outputs are therefore
55 * a factor of N larger than desired; since N=8 this can be cured by
56 * a simple right shift at the end of the algorithm. The advantage of
57 * this arrangement is that we save two multiplications per 1-D IDCT,
58 * because the y0 and y4 inputs need not be divided by sqrt(N).
59 *
60 * We have to do addition and subtraction of the integer inputs, which
61 * is no problem, and multiplication by fractional constants, which is
62 * a problem to do in integer arithmetic. We multiply all the constants
63 * by CONST_SCALE and convert them to integer constants (thus retaining
64 * CONST_BITS bits of precision in the constants). After doing a
65 * multiplication we have to divide the product by CONST_SCALE, with proper
66 * rounding, to produce the correct output. This division can be done
67 * cheaply as a right shift of CONST_BITS bits. We postpone shifting
68 * as long as possible so that partial sums can be added together with
69 * full fractional precision.
70 *
71 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
72 * they are represented to better-than-integral precision. These outputs
73 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
74 * with the recommended scaling. (To scale up 12-bit sample data further, an
75 * intermediate int32 array would be needed.)
76 *
77 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
78 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
79 * shows that the values given below are the most effective.
80 */
81
82 #ifdef EIGHT_BIT_SAMPLES
83 #define PASS1_BITS 2
84 #else
85 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */
86 #endif
87
88 #define ONE ((int32) 1)
89
90 #define CONST_SCALE (ONE << CONST_BITS)
91
92 /* Convert a positive real constant to an integer scaled by CONST_SCALE.
93 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
94 * you will pay a significant penalty in run time. In that case, figure
95 * the correct integer constant values and insert them by hand.
96 */
97
98 /* Actually FIX is no longer used, we precomputed them all */
99 #define FIX(x) ((int32) ((x) * CONST_SCALE + 0.5))
100
101 /* Descale and correctly round an int32 value that's scaled by N bits.
102 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
103 * the fudge factor is correct for either sign of X.
104 */
105
106 #define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
107
108 /* Multiply an int32 variable by an int32 constant to yield an int32 result.
109 * For 8-bit samples with the recommended scaling, all the variable
110 * and constant values involved are no more than 16 bits wide, so a
111 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
112 * this provides a useful speedup on many machines.
113 * There is no way to specify a 16x16->32 multiply in portable C, but
114 * some C compilers will do the right thing if you provide the correct
115 * combination of casts.
116 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
117 */
118
119 #ifdef EIGHT_BIT_SAMPLES
120 #ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
121 #define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const)))
122 #endif
123 #ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
124 #define MULTIPLY(var,const) (((INT16) (var)) * ((int32) (const)))
125 #endif
126 #endif
127
128 #ifndef MULTIPLY /* default definition */
129 #define MULTIPLY(var,const) ((var) * (const))
130 #endif
131
132
133 /*
134 Unlike our decoder where we approximate the FIXes, we need to use exact
135 ones here or successive P-frames will drift too much with Reference frame coding
136 */
137 #define FIX_0_211164243 1730
138 #define FIX_0_275899380 2260
139 #define FIX_0_298631336 2446
140 #define FIX_0_390180644 3196
141 #define FIX_0_509795579 4176
142 #define FIX_0_541196100 4433
143 #define FIX_0_601344887 4926
144 #define FIX_0_765366865 6270
145 #define FIX_0_785694958 6436
146 #define FIX_0_899976223 7373
147 #define FIX_1_061594337 8697
148 #define FIX_1_111140466 9102
149 #define FIX_1_175875602 9633
150 #define FIX_1_306562965 10703
151 #define FIX_1_387039845 11363
152 #define FIX_1_451774981 11893
153 #define FIX_1_501321110 12299
154 #define FIX_1_662939225 13623
155 #define FIX_1_847759065 15137
156 #define FIX_1_961570560 16069
157 #define FIX_2_053119869 16819
158 #define FIX_2_172734803 17799
159 #define FIX_2_562915447 20995
160 #define FIX_3_072711026 25172
161
162 /*
163 Switch on reverse_dct choices
164 */
165 void reference_rev_dct _ANSI_ARGS_((int16 *block));
166 void mpeg_jrevdct_quick _ANSI_ARGS_((int16 *block));
167 void init_idctref _ANSI_ARGS_((void));
168
169 extern boolean pureDCT;
170
171 void
mpeg_jrevdct(DCTBLOCK data)172 mpeg_jrevdct(DCTBLOCK data)
173 {
174 if (pureDCT) reference_rev_dct(data);
175 else mpeg_jrevdct_quick(data);
176 }
177
178 /*
179 * Perform the inverse DCT on one block of coefficients.
180 */
181
182 void
mpeg_jrevdct_quick(DCTBLOCK data)183 mpeg_jrevdct_quick(DCTBLOCK data)
184 {
185 int32 tmp0, tmp1, tmp2, tmp3;
186 int32 tmp10, tmp11, tmp12, tmp13;
187 int32 z1, z2, z3, z4, z5;
188 int32 d0, d1, d2, d3, d4, d5, d6, d7;
189 register DCTELEM *dataptr;
190 int rowctr;
191 SHIFT_TEMPS
192
193 /* Pass 1: process rows. */
194 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
195 /* furthermore, we scale the results by 2**PASS1_BITS. */
196
197 dataptr = data;
198
199 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
200 /* Due to quantization, we will usually find that many of the input
201 * coefficients are zero, especially the AC terms. We can exploit this
202 * by short-circuiting the IDCT calculation for any row in which all
203 * the AC terms are zero. In that case each output is equal to the
204 * DC coefficient (with scale factor as needed).
205 * With typical images and quantization tables, half or more of the
206 * row DCT calculations can be simplified this way.
207 */
208
209 register int *idataptr = (int*)dataptr;
210 d0 = dataptr[0];
211 d1 = dataptr[1];
212 if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) {
213 /* AC terms all zero */
214 if (d0) {
215 /* Compute a 32 bit value to assign. */
216 DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
217 register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
218
219 idataptr[0] = v;
220 idataptr[1] = v;
221 idataptr[2] = v;
222 idataptr[3] = v;
223 }
224
225 dataptr += DCTSIZE; /* advance pointer to next row */
226 continue;
227 }
228 d2 = dataptr[2];
229 d3 = dataptr[3];
230 d4 = dataptr[4];
231 d5 = dataptr[5];
232 d6 = dataptr[6];
233 d7 = dataptr[7];
234
235 /* Even part: reverse the even part of the forward DCT. */
236 /* The rotator is sqrt(2)*c(-6). */
237 {
238 if (d6) {
239 if (d4) {
240 if (d2) {
241 if (d0) {
242 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
243 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
244 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
245 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
246
247 tmp0 = (d0 + d4) << CONST_BITS;
248 tmp1 = (d0 - d4) << CONST_BITS;
249
250 tmp10 = tmp0 + tmp3;
251 tmp13 = tmp0 - tmp3;
252 tmp11 = tmp1 + tmp2;
253 tmp12 = tmp1 - tmp2;
254 } else {
255 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
256 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
257 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
258 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
259
260 tmp0 = d4 << CONST_BITS;
261
262 tmp10 = tmp0 + tmp3;
263 tmp13 = tmp0 - tmp3;
264 tmp11 = tmp2 - tmp0;
265 tmp12 = -(tmp0 + tmp2);
266 }
267 } else {
268 if (d0) {
269 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
270 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
271 tmp3 = MULTIPLY(d6, FIX_0_541196100);
272
273 tmp0 = (d0 + d4) << CONST_BITS;
274 tmp1 = (d0 - d4) << CONST_BITS;
275
276 tmp10 = tmp0 + tmp3;
277 tmp13 = tmp0 - tmp3;
278 tmp11 = tmp1 + tmp2;
279 tmp12 = tmp1 - tmp2;
280 } else {
281 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
282 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
283 tmp3 = MULTIPLY(d6, FIX_0_541196100);
284
285 tmp0 = d4 << CONST_BITS;
286
287 tmp10 = tmp0 + tmp3;
288 tmp13 = tmp0 - tmp3;
289 tmp11 = tmp2 - tmp0;
290 tmp12 = -(tmp0 + tmp2);
291 }
292 }
293 } else {
294 if (d2) {
295 if (d0) {
296 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
297 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
298 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
299 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
300
301 tmp0 = d0 << CONST_BITS;
302
303 tmp10 = tmp0 + tmp3;
304 tmp13 = tmp0 - tmp3;
305 tmp11 = tmp0 + tmp2;
306 tmp12 = tmp0 - tmp2;
307 } else {
308 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
309 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
310 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
311 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
312
313 tmp10 = tmp3;
314 tmp13 = -tmp3;
315 tmp11 = tmp2;
316 tmp12 = -tmp2;
317 }
318 } else {
319 if (d0) {
320 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
321 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
322 tmp3 = MULTIPLY(d6, FIX_0_541196100);
323
324 tmp0 = d0 << CONST_BITS;
325
326 tmp10 = tmp0 + tmp3;
327 tmp13 = tmp0 - tmp3;
328 tmp11 = tmp0 + tmp2;
329 tmp12 = tmp0 - tmp2;
330 } else {
331 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
332 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
333 tmp3 = MULTIPLY(d6, FIX_0_541196100);
334
335 tmp10 = tmp3;
336 tmp13 = -tmp3;
337 tmp11 = tmp2;
338 tmp12 = -tmp2;
339 }
340 }
341 }
342 } else {
343 if (d4) {
344 if (d2) {
345 if (d0) {
346 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
347 tmp2 = MULTIPLY(d2, FIX_0_541196100);
348 tmp3 = MULTIPLY(d2, FIX_1_306562965);
349
350 tmp0 = (d0 + d4) << CONST_BITS;
351 tmp1 = (d0 - d4) << CONST_BITS;
352
353 tmp10 = tmp0 + tmp3;
354 tmp13 = tmp0 - tmp3;
355 tmp11 = tmp1 + tmp2;
356 tmp12 = tmp1 - tmp2;
357 } else {
358 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
359 tmp2 = MULTIPLY(d2, FIX_0_541196100);
360 tmp3 = MULTIPLY(d2, FIX_1_306562965);
361
362 tmp0 = d4 << CONST_BITS;
363
364 tmp10 = tmp0 + tmp3;
365 tmp13 = tmp0 - tmp3;
366 tmp11 = tmp2 - tmp0;
367 tmp12 = -(tmp0 + tmp2);
368 }
369 } else {
370 if (d0) {
371 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
372 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
373 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
374 } else {
375 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
376 tmp10 = tmp13 = d4 << CONST_BITS;
377 tmp11 = tmp12 = -tmp10;
378 }
379 }
380 } else {
381 if (d2) {
382 if (d0) {
383 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
384 tmp2 = MULTIPLY(d2, FIX_0_541196100);
385 tmp3 = MULTIPLY(d2, FIX_1_306562965);
386
387 tmp0 = d0 << CONST_BITS;
388
389 tmp10 = tmp0 + tmp3;
390 tmp13 = tmp0 - tmp3;
391 tmp11 = tmp0 + tmp2;
392 tmp12 = tmp0 - tmp2;
393 } else {
394 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
395 tmp2 = MULTIPLY(d2, FIX_0_541196100);
396 tmp3 = MULTIPLY(d2, FIX_1_306562965);
397
398 tmp10 = tmp3;
399 tmp13 = -tmp3;
400 tmp11 = tmp2;
401 tmp12 = -tmp2;
402 }
403 } else {
404 if (d0) {
405 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
406 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
407 } else {
408 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
409 tmp10 = tmp13 = tmp11 = tmp12 = 0;
410 }
411 }
412 }
413 }
414
415 /* Odd part per figure 8; the matrix is unitary and hence its
416 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
417 */
418
419 if (d7) {
420 if (d5) {
421 if (d3) {
422 if (d1) {
423 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
424 z1 = d7 + d1;
425 z2 = d5 + d3;
426 z3 = d7 + d3;
427 z4 = d5 + d1;
428 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
429
430 tmp0 = MULTIPLY(d7, FIX_0_298631336);
431 tmp1 = MULTIPLY(d5, FIX_2_053119869);
432 tmp2 = MULTIPLY(d3, FIX_3_072711026);
433 tmp3 = MULTIPLY(d1, FIX_1_501321110);
434 z1 = MULTIPLY(-z1, FIX_0_899976223);
435 z2 = MULTIPLY(-z2, FIX_2_562915447);
436 z3 = MULTIPLY(-z3, FIX_1_961570560);
437 z4 = MULTIPLY(-z4, FIX_0_390180644);
438
439 z3 += z5;
440 z4 += z5;
441
442 tmp0 += z1 + z3;
443 tmp1 += z2 + z4;
444 tmp2 += z2 + z3;
445 tmp3 += z1 + z4;
446 } else {
447 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
448 z2 = d5 + d3;
449 z3 = d7 + d3;
450 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
451
452 tmp0 = MULTIPLY(d7, FIX_0_298631336);
453 tmp1 = MULTIPLY(d5, FIX_2_053119869);
454 tmp2 = MULTIPLY(d3, FIX_3_072711026);
455 z1 = MULTIPLY(-d7, FIX_0_899976223);
456 z2 = MULTIPLY(-z2, FIX_2_562915447);
457 z3 = MULTIPLY(-z3, FIX_1_961570560);
458 z4 = MULTIPLY(-d5, FIX_0_390180644);
459
460 z3 += z5;
461 z4 += z5;
462
463 tmp0 += z1 + z3;
464 tmp1 += z2 + z4;
465 tmp2 += z2 + z3;
466 tmp3 = z1 + z4;
467 }
468 } else {
469 if (d1) {
470 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
471 z1 = d7 + d1;
472 z4 = d5 + d1;
473 z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
474
475 tmp0 = MULTIPLY(d7, FIX_0_298631336);
476 tmp1 = MULTIPLY(d5, FIX_2_053119869);
477 tmp3 = MULTIPLY(d1, FIX_1_501321110);
478 z1 = MULTIPLY(-z1, FIX_0_899976223);
479 z2 = MULTIPLY(-d5, FIX_2_562915447);
480 z3 = MULTIPLY(-d7, FIX_1_961570560);
481 z4 = MULTIPLY(-z4, FIX_0_390180644);
482
483 z3 += z5;
484 z4 += z5;
485
486 tmp0 += z1 + z3;
487 tmp1 += z2 + z4;
488 tmp2 = z2 + z3;
489 tmp3 += z1 + z4;
490 } else {
491 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
492 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
493 z1 = MULTIPLY(-d7, FIX_0_899976223);
494 z3 = MULTIPLY(-d7, FIX_1_961570560);
495 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
496 z2 = MULTIPLY(-d5, FIX_2_562915447);
497 z4 = MULTIPLY(-d5, FIX_0_390180644);
498 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
499
500 z3 += z5;
501 z4 += z5;
502
503 tmp0 += z3;
504 tmp1 += z4;
505 tmp2 = z2 + z3;
506 tmp3 = z1 + z4;
507 }
508 }
509 } else {
510 if (d3) {
511 if (d1) {
512 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
513 z1 = d7 + d1;
514 z3 = d7 + d3;
515 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
516
517 tmp0 = MULTIPLY(d7, FIX_0_298631336);
518 tmp2 = MULTIPLY(d3, FIX_3_072711026);
519 tmp3 = MULTIPLY(d1, FIX_1_501321110);
520 z1 = MULTIPLY(-z1, FIX_0_899976223);
521 z2 = MULTIPLY(-d3, FIX_2_562915447);
522 z3 = MULTIPLY(-z3, FIX_1_961570560);
523 z4 = MULTIPLY(-d1, FIX_0_390180644);
524
525 z3 += z5;
526 z4 += z5;
527
528 tmp0 += z1 + z3;
529 tmp1 = z2 + z4;
530 tmp2 += z2 + z3;
531 tmp3 += z1 + z4;
532 } else {
533 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
534 z3 = d7 + d3;
535
536 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
537 z1 = MULTIPLY(-d7, FIX_0_899976223);
538 tmp2 = MULTIPLY(d3, FIX_0_509795579);
539 z2 = MULTIPLY(-d3, FIX_2_562915447);
540 z5 = MULTIPLY(z3, FIX_1_175875602);
541 z3 = MULTIPLY(-z3, FIX_0_785694958);
542
543 tmp0 += z3;
544 tmp1 = z2 + z5;
545 tmp2 += z3;
546 tmp3 = z1 + z5;
547 }
548 } else {
549 if (d1) {
550 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
551 z1 = d7 + d1;
552 z5 = MULTIPLY(z1, FIX_1_175875602);
553
554 z1 = MULTIPLY(z1, FIX_0_275899380);
555 z3 = MULTIPLY(-d7, FIX_1_961570560);
556 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
557 z4 = MULTIPLY(-d1, FIX_0_390180644);
558 tmp3 = MULTIPLY(d1, FIX_1_111140466);
559
560 tmp0 += z1;
561 tmp1 = z4 + z5;
562 tmp2 = z3 + z5;
563 tmp3 += z1;
564 } else {
565 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
566 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
567 tmp1 = MULTIPLY(d7, FIX_1_175875602);
568 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
569 tmp3 = MULTIPLY(d7, FIX_0_275899380);
570 }
571 }
572 }
573 } else {
574 if (d5) {
575 if (d3) {
576 if (d1) {
577 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
578 z2 = d5 + d3;
579 z4 = d5 + d1;
580 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
581
582 tmp1 = MULTIPLY(d5, FIX_2_053119869);
583 tmp2 = MULTIPLY(d3, FIX_3_072711026);
584 tmp3 = MULTIPLY(d1, FIX_1_501321110);
585 z1 = MULTIPLY(-d1, FIX_0_899976223);
586 z2 = MULTIPLY(-z2, FIX_2_562915447);
587 z3 = MULTIPLY(-d3, FIX_1_961570560);
588 z4 = MULTIPLY(-z4, FIX_0_390180644);
589
590 z3 += z5;
591 z4 += z5;
592
593 tmp0 = z1 + z3;
594 tmp1 += z2 + z4;
595 tmp2 += z2 + z3;
596 tmp3 += z1 + z4;
597 } else {
598 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
599 z2 = d5 + d3;
600
601 z5 = MULTIPLY(z2, FIX_1_175875602);
602 tmp1 = MULTIPLY(d5, FIX_1_662939225);
603 z4 = MULTIPLY(-d5, FIX_0_390180644);
604 z2 = MULTIPLY(-z2, FIX_1_387039845);
605 tmp2 = MULTIPLY(d3, FIX_1_111140466);
606 z3 = MULTIPLY(-d3, FIX_1_961570560);
607
608 tmp0 = z3 + z5;
609 tmp1 += z2;
610 tmp2 += z2;
611 tmp3 = z4 + z5;
612 }
613 } else {
614 if (d1) {
615 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
616 z4 = d5 + d1;
617
618 z5 = MULTIPLY(z4, FIX_1_175875602);
619 z1 = MULTIPLY(-d1, FIX_0_899976223);
620 tmp3 = MULTIPLY(d1, FIX_0_601344887);
621 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
622 z2 = MULTIPLY(-d5, FIX_2_562915447);
623 z4 = MULTIPLY(z4, FIX_0_785694958);
624
625 tmp0 = z1 + z5;
626 tmp1 += z4;
627 tmp2 = z2 + z5;
628 tmp3 += z4;
629 } else {
630 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
631 tmp0 = MULTIPLY(d5, FIX_1_175875602);
632 tmp1 = MULTIPLY(d5, FIX_0_275899380);
633 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
634 tmp3 = MULTIPLY(d5, FIX_0_785694958);
635 }
636 }
637 } else {
638 if (d3) {
639 if (d1) {
640 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
641 z5 = d1 + d3;
642 tmp3 = MULTIPLY(d1, FIX_0_211164243);
643 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
644 z1 = MULTIPLY(d1, FIX_1_061594337);
645 z2 = MULTIPLY(-d3, FIX_2_172734803);
646 z4 = MULTIPLY(z5, FIX_0_785694958);
647 z5 = MULTIPLY(z5, FIX_1_175875602);
648
649 tmp0 = z1 - z4;
650 tmp1 = z2 + z4;
651 tmp2 += z5;
652 tmp3 += z5;
653 } else {
654 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
655 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
656 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
657 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
658 tmp3 = MULTIPLY(d3, FIX_1_175875602);
659 }
660 } else {
661 if (d1) {
662 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
663 tmp0 = MULTIPLY(d1, FIX_0_275899380);
664 tmp1 = MULTIPLY(d1, FIX_0_785694958);
665 tmp2 = MULTIPLY(d1, FIX_1_175875602);
666 tmp3 = MULTIPLY(d1, FIX_1_387039845);
667 } else {
668 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
669 tmp0 = tmp1 = tmp2 = tmp3 = 0;
670 }
671 }
672 }
673 }
674 }
675 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
676
677 dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
678 dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
679 dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
680 dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
681 dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
682 dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
683 dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
684 dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
685
686 dataptr += DCTSIZE; /* advance pointer to next row */
687 }
688
689 /* Pass 2: process columns. */
690 /* Note that we must descale the results by a factor of 8 == 2**3, */
691 /* and also undo the PASS1_BITS scaling. */
692
693 dataptr = data;
694 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
695 /* Columns of zeroes can be exploited in the same way as we did with rows.
696 * However, the row calculation has created many nonzero AC terms, so the
697 * simplification applies less often (typically 5% to 10% of the time).
698 * On machines with very fast multiplication, it's possible that the
699 * test takes more time than it's worth. In that case this section
700 * may be commented out.
701 */
702
703 d0 = dataptr[DCTSIZE*0];
704 d1 = dataptr[DCTSIZE*1];
705 d2 = dataptr[DCTSIZE*2];
706 d3 = dataptr[DCTSIZE*3];
707 d4 = dataptr[DCTSIZE*4];
708 d5 = dataptr[DCTSIZE*5];
709 d6 = dataptr[DCTSIZE*6];
710 d7 = dataptr[DCTSIZE*7];
711
712 /* Even part: reverse the even part of the forward DCT. */
713 /* The rotator is sqrt(2)*c(-6). */
714 if (d6) {
715 if (d4) {
716 if (d2) {
717 if (d0) {
718 /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
719 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
720 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
721 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
722
723 tmp0 = (d0 + d4) << CONST_BITS;
724 tmp1 = (d0 - d4) << CONST_BITS;
725
726 tmp10 = tmp0 + tmp3;
727 tmp13 = tmp0 - tmp3;
728 tmp11 = tmp1 + tmp2;
729 tmp12 = tmp1 - tmp2;
730 } else {
731 /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
732 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
733 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
734 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
735
736 tmp0 = d4 << CONST_BITS;
737
738 tmp10 = tmp0 + tmp3;
739 tmp13 = tmp0 - tmp3;
740 tmp11 = tmp2 - tmp0;
741 tmp12 = -(tmp0 + tmp2);
742 }
743 } else {
744 if (d0) {
745 /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
746 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
747 tmp3 = MULTIPLY(d6, FIX_0_541196100);
748
749 tmp0 = (d0 + d4) << CONST_BITS;
750 tmp1 = (d0 - d4) << CONST_BITS;
751
752 tmp10 = tmp0 + tmp3;
753 tmp13 = tmp0 - tmp3;
754 tmp11 = tmp1 + tmp2;
755 tmp12 = tmp1 - tmp2;
756 } else {
757 /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
758 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
759 tmp3 = MULTIPLY(d6, FIX_0_541196100);
760
761 tmp0 = d4 << CONST_BITS;
762
763 tmp10 = tmp0 + tmp3;
764 tmp13 = tmp0 - tmp3;
765 tmp11 = tmp2 - tmp0;
766 tmp12 = -(tmp0 + tmp2);
767 }
768 }
769 } else {
770 if (d2) {
771 if (d0) {
772 /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
773 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
774 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
775 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
776
777 tmp0 = d0 << CONST_BITS;
778
779 tmp10 = tmp0 + tmp3;
780 tmp13 = tmp0 - tmp3;
781 tmp11 = tmp0 + tmp2;
782 tmp12 = tmp0 - tmp2;
783 } else {
784 /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
785 z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
786 tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
787 tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
788
789 tmp10 = tmp3;
790 tmp13 = -tmp3;
791 tmp11 = tmp2;
792 tmp12 = -tmp2;
793 }
794 } else {
795 if (d0) {
796 /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
797 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
798 tmp3 = MULTIPLY(d6, FIX_0_541196100);
799
800 tmp0 = d0 << CONST_BITS;
801
802 tmp10 = tmp0 + tmp3;
803 tmp13 = tmp0 - tmp3;
804 tmp11 = tmp0 + tmp2;
805 tmp12 = tmp0 - tmp2;
806 } else {
807 /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
808 tmp2 = MULTIPLY(-d6, FIX_1_306562965);
809 tmp3 = MULTIPLY(d6, FIX_0_541196100);
810
811 tmp10 = tmp3;
812 tmp13 = -tmp3;
813 tmp11 = tmp2;
814 tmp12 = -tmp2;
815 }
816 }
817 }
818 } else {
819 if (d4) {
820 if (d2) {
821 if (d0) {
822 /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
823 tmp2 = MULTIPLY(d2, FIX_0_541196100);
824 tmp3 = MULTIPLY(d2, FIX_1_306562965);
825
826 tmp0 = (d0 + d4) << CONST_BITS;
827 tmp1 = (d0 - d4) << CONST_BITS;
828
829 tmp10 = tmp0 + tmp3;
830 tmp13 = tmp0 - tmp3;
831 tmp11 = tmp1 + tmp2;
832 tmp12 = tmp1 - tmp2;
833 } else {
834 /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
835 tmp2 = MULTIPLY(d2, FIX_0_541196100);
836 tmp3 = MULTIPLY(d2, FIX_1_306562965);
837
838 tmp0 = d4 << CONST_BITS;
839
840 tmp10 = tmp0 + tmp3;
841 tmp13 = tmp0 - tmp3;
842 tmp11 = tmp2 - tmp0;
843 tmp12 = -(tmp0 + tmp2);
844 }
845 } else {
846 if (d0) {
847 /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
848 tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
849 tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
850 } else {
851 /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
852 tmp10 = tmp13 = d4 << CONST_BITS;
853 tmp11 = tmp12 = -tmp10;
854 }
855 }
856 } else {
857 if (d2) {
858 if (d0) {
859 /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
860 tmp2 = MULTIPLY(d2, FIX_0_541196100);
861 tmp3 = MULTIPLY(d2, FIX_1_306562965);
862
863 tmp0 = d0 << CONST_BITS;
864
865 tmp10 = tmp0 + tmp3;
866 tmp13 = tmp0 - tmp3;
867 tmp11 = tmp0 + tmp2;
868 tmp12 = tmp0 - tmp2;
869 } else {
870 /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
871 tmp2 = MULTIPLY(d2, FIX_0_541196100);
872 tmp3 = MULTIPLY(d2, FIX_1_306562965);
873
874 tmp10 = tmp3;
875 tmp13 = -tmp3;
876 tmp11 = tmp2;
877 tmp12 = -tmp2;
878 }
879 } else {
880 if (d0) {
881 /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
882 tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
883 } else {
884 /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
885 tmp10 = tmp13 = tmp11 = tmp12 = 0;
886 }
887 }
888 }
889 }
890
891 /* Odd part per figure 8; the matrix is unitary and hence its
892 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
893 */
894 if (d7) {
895 if (d5) {
896 if (d3) {
897 if (d1) {
898 /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
899 z1 = d7 + d1;
900 z2 = d5 + d3;
901 z3 = d7 + d3;
902 z4 = d5 + d1;
903 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
904
905 tmp0 = MULTIPLY(d7, FIX_0_298631336);
906 tmp1 = MULTIPLY(d5, FIX_2_053119869);
907 tmp2 = MULTIPLY(d3, FIX_3_072711026);
908 tmp3 = MULTIPLY(d1, FIX_1_501321110);
909 z1 = MULTIPLY(-z1, FIX_0_899976223);
910 z2 = MULTIPLY(-z2, FIX_2_562915447);
911 z3 = MULTIPLY(-z3, FIX_1_961570560);
912 z4 = MULTIPLY(-z4, FIX_0_390180644);
913
914 z3 += z5;
915 z4 += z5;
916
917 tmp0 += z1 + z3;
918 tmp1 += z2 + z4;
919 tmp2 += z2 + z3;
920 tmp3 += z1 + z4;
921 } else {
922 /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
923 z1 = d7;
924 z2 = d5 + d3;
925 z3 = d7 + d3;
926 z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
927
928 tmp0 = MULTIPLY(d7, FIX_0_298631336);
929 tmp1 = MULTIPLY(d5, FIX_2_053119869);
930 tmp2 = MULTIPLY(d3, FIX_3_072711026);
931 z1 = MULTIPLY(-d7, FIX_0_899976223);
932 z2 = MULTIPLY(-z2, FIX_2_562915447);
933 z3 = MULTIPLY(-z3, FIX_1_961570560);
934 z4 = MULTIPLY(-d5, FIX_0_390180644);
935
936 z3 += z5;
937 z4 += z5;
938
939 tmp0 += z1 + z3;
940 tmp1 += z2 + z4;
941 tmp2 += z2 + z3;
942 tmp3 = z1 + z4;
943 }
944 } else {
945 if (d1) {
946 /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
947 z1 = d7 + d1;
948 z2 = d5;
949 z3 = d7;
950 z4 = d5 + d1;
951 z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
952
953 tmp0 = MULTIPLY(d7, FIX_0_298631336);
954 tmp1 = MULTIPLY(d5, FIX_2_053119869);
955 tmp3 = MULTIPLY(d1, FIX_1_501321110);
956 z1 = MULTIPLY(-z1, FIX_0_899976223);
957 z2 = MULTIPLY(-d5, FIX_2_562915447);
958 z3 = MULTIPLY(-d7, FIX_1_961570560);
959 z4 = MULTIPLY(-z4, FIX_0_390180644);
960
961 z3 += z5;
962 z4 += z5;
963
964 tmp0 += z1 + z3;
965 tmp1 += z2 + z4;
966 tmp2 = z2 + z3;
967 tmp3 += z1 + z4;
968 } else {
969 /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
970 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
971 z1 = MULTIPLY(-d7, FIX_0_899976223);
972 z3 = MULTIPLY(-d7, FIX_1_961570560);
973 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
974 z2 = MULTIPLY(-d5, FIX_2_562915447);
975 z4 = MULTIPLY(-d5, FIX_0_390180644);
976 z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
977
978 z3 += z5;
979 z4 += z5;
980
981 tmp0 += z3;
982 tmp1 += z4;
983 tmp2 = z2 + z3;
984 tmp3 = z1 + z4;
985 }
986 }
987 } else {
988 if (d3) {
989 if (d1) {
990 /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
991 z1 = d7 + d1;
992 z3 = d7 + d3;
993 z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
994
995 tmp0 = MULTIPLY(d7, FIX_0_298631336);
996 tmp2 = MULTIPLY(d3, FIX_3_072711026);
997 tmp3 = MULTIPLY(d1, FIX_1_501321110);
998 z1 = MULTIPLY(-z1, FIX_0_899976223);
999 z2 = MULTIPLY(-d3, FIX_2_562915447);
1000 z3 = MULTIPLY(-z3, FIX_1_961570560);
1001 z4 = MULTIPLY(-d1, FIX_0_390180644);
1002
1003 z3 += z5;
1004 z4 += z5;
1005
1006 tmp0 += z1 + z3;
1007 tmp1 = z2 + z4;
1008 tmp2 += z2 + z3;
1009 tmp3 += z1 + z4;
1010 } else {
1011 /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1012 z3 = d7 + d3;
1013
1014 tmp0 = MULTIPLY(-d7, FIX_0_601344887);
1015 z1 = MULTIPLY(-d7, FIX_0_899976223);
1016 tmp2 = MULTIPLY(d3, FIX_0_509795579);
1017 z2 = MULTIPLY(-d3, FIX_2_562915447);
1018 z5 = MULTIPLY(z3, FIX_1_175875602);
1019 z3 = MULTIPLY(-z3, FIX_0_785694958);
1020
1021 tmp0 += z3;
1022 tmp1 = z2 + z5;
1023 tmp2 += z3;
1024 tmp3 = z1 + z5;
1025 }
1026 } else {
1027 if (d1) {
1028 /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1029 z1 = d7 + d1;
1030 z5 = MULTIPLY(z1, FIX_1_175875602);
1031
1032 z1 = MULTIPLY(z1, FIX_0_275899380);
1033 z3 = MULTIPLY(-d7, FIX_1_961570560);
1034 tmp0 = MULTIPLY(-d7, FIX_1_662939225);
1035 z4 = MULTIPLY(-d1, FIX_0_390180644);
1036 tmp3 = MULTIPLY(d1, FIX_1_111140466);
1037
1038 tmp0 += z1;
1039 tmp1 = z4 + z5;
1040 tmp2 = z3 + z5;
1041 tmp3 += z1;
1042 } else {
1043 /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1044 tmp0 = MULTIPLY(-d7, FIX_1_387039845);
1045 tmp1 = MULTIPLY(d7, FIX_1_175875602);
1046 tmp2 = MULTIPLY(-d7, FIX_0_785694958);
1047 tmp3 = MULTIPLY(d7, FIX_0_275899380);
1048 }
1049 }
1050 }
1051 } else {
1052 if (d5) {
1053 if (d3) {
1054 if (d1) {
1055 /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1056 z2 = d5 + d3;
1057 z4 = d5 + d1;
1058 z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
1059
1060 tmp1 = MULTIPLY(d5, FIX_2_053119869);
1061 tmp2 = MULTIPLY(d3, FIX_3_072711026);
1062 tmp3 = MULTIPLY(d1, FIX_1_501321110);
1063 z1 = MULTIPLY(-d1, FIX_0_899976223);
1064 z2 = MULTIPLY(-z2, FIX_2_562915447);
1065 z3 = MULTIPLY(-d3, FIX_1_961570560);
1066 z4 = MULTIPLY(-z4, FIX_0_390180644);
1067
1068 z3 += z5;
1069 z4 += z5;
1070
1071 tmp0 = z1 + z3;
1072 tmp1 += z2 + z4;
1073 tmp2 += z2 + z3;
1074 tmp3 += z1 + z4;
1075 } else {
1076 /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1077 z2 = d5 + d3;
1078
1079 z5 = MULTIPLY(z2, FIX_1_175875602);
1080 tmp1 = MULTIPLY(d5, FIX_1_662939225);
1081 z4 = MULTIPLY(-d5, FIX_0_390180644);
1082 z2 = MULTIPLY(-z2, FIX_1_387039845);
1083 tmp2 = MULTIPLY(d3, FIX_1_111140466);
1084 z3 = MULTIPLY(-d3, FIX_1_961570560);
1085
1086 tmp0 = z3 + z5;
1087 tmp1 += z2;
1088 tmp2 += z2;
1089 tmp3 = z4 + z5;
1090 }
1091 } else {
1092 if (d1) {
1093 /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1094 z4 = d5 + d1;
1095
1096 z5 = MULTIPLY(z4, FIX_1_175875602);
1097 z1 = MULTIPLY(-d1, FIX_0_899976223);
1098 tmp3 = MULTIPLY(d1, FIX_0_601344887);
1099 tmp1 = MULTIPLY(-d5, FIX_0_509795579);
1100 z2 = MULTIPLY(-d5, FIX_2_562915447);
1101 z4 = MULTIPLY(z4, FIX_0_785694958);
1102
1103 tmp0 = z1 + z5;
1104 tmp1 += z4;
1105 tmp2 = z2 + z5;
1106 tmp3 += z4;
1107 } else {
1108 /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1109 tmp0 = MULTIPLY(d5, FIX_1_175875602);
1110 tmp1 = MULTIPLY(d5, FIX_0_275899380);
1111 tmp2 = MULTIPLY(-d5, FIX_1_387039845);
1112 tmp3 = MULTIPLY(d5, FIX_0_785694958);
1113 }
1114 }
1115 } else {
1116 if (d3) {
1117 if (d1) {
1118 /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1119 z5 = d1 + d3;
1120 tmp3 = MULTIPLY(d1, FIX_0_211164243);
1121 tmp2 = MULTIPLY(-d3, FIX_1_451774981);
1122 z1 = MULTIPLY(d1, FIX_1_061594337);
1123 z2 = MULTIPLY(-d3, FIX_2_172734803);
1124 z4 = MULTIPLY(z5, FIX_0_785694958);
1125 z5 = MULTIPLY(z5, FIX_1_175875602);
1126
1127 tmp0 = z1 - z4;
1128 tmp1 = z2 + z4;
1129 tmp2 += z5;
1130 tmp3 += z5;
1131 } else {
1132 /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1133 tmp0 = MULTIPLY(-d3, FIX_0_785694958);
1134 tmp1 = MULTIPLY(-d3, FIX_1_387039845);
1135 tmp2 = MULTIPLY(-d3, FIX_0_275899380);
1136 tmp3 = MULTIPLY(d3, FIX_1_175875602);
1137 }
1138 } else {
1139 if (d1) {
1140 /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1141 tmp0 = MULTIPLY(d1, FIX_0_275899380);
1142 tmp1 = MULTIPLY(d1, FIX_0_785694958);
1143 tmp2 = MULTIPLY(d1, FIX_1_175875602);
1144 tmp3 = MULTIPLY(d1, FIX_1_387039845);
1145 } else {
1146 /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1147 tmp0 = tmp1 = tmp2 = tmp3 = 0;
1148 }
1149 }
1150 }
1151 }
1152
1153 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1154
1155 dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
1156 CONST_BITS+PASS1_BITS+3);
1157 dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
1158 CONST_BITS+PASS1_BITS+3);
1159 dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
1160 CONST_BITS+PASS1_BITS+3);
1161 dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
1162 CONST_BITS+PASS1_BITS+3);
1163 dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
1164 CONST_BITS+PASS1_BITS+3);
1165 dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
1166 CONST_BITS+PASS1_BITS+3);
1167 dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
1168 CONST_BITS+PASS1_BITS+3);
1169 dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
1170 CONST_BITS+PASS1_BITS+3);
1171
1172 dataptr++; /* advance pointer to next column */
1173 }
1174 }
1175
1176
1177 /* here is the reference one, in case of problems with the normal one */
1178
1179 /* idctref.c, Inverse Discrete Fourier Transform, double precision */
1180
1181 /* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */
1182
1183 /*
1184 * Disclaimer of Warranty
1185 *
1186 * These software programs are available to the user without any license fee or
1187 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
1188 * any and all warranties, whether express, implied, or statuary, including any
1189 * implied warranties or merchantability or of fitness for a particular
1190 * purpose. In no event shall the copyright-holder be liable for any
1191 * incidental, punitive, or consequential damages of any kind whatsoever
1192 * arising from the use of these programs.
1193 *
1194 * This disclaimer of warranty extends to the user of these programs and user's
1195 * customers, employees, agents, transferees, successors, and assigns.
1196 *
1197 * The MPEG Software Simulation Group does not represent or warrant that the
1198 * programs furnished hereunder are free of infringement of any third-party
1199 * patents.
1200 *
1201 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
1202 * are subject to royalty fees to patent holders. Many of these patents are
1203 * general enough such that they are unavoidable regardless of implementation
1204 * design.
1205 *
1206 */
1207
1208 /* Perform IEEE 1180 reference (64-bit floating point, separable 8x1
1209 * direct matrix multiply) Inverse Discrete Cosine Transform
1210 */
1211
1212
1213 /* Here we use math.h to generate constants. Compiler results may
1214 vary a little */
1215
1216 #ifndef PI
1217 #ifdef M_PI
1218 #define PI M_PI
1219 #else
1220 #define PI 3.14159265358979323846
1221 #endif
1222 #endif
1223
1224 /* cosine transform matrix for 8x1 IDCT */
1225 static double itrans_coef[8][8];
1226
1227 /* initialize DCT coefficient matrix */
1228
init_idctref()1229 void init_idctref()
1230 {
1231 int freq, time;
1232 double scale;
1233
1234 for (freq=0; freq < 8; freq++)
1235 {
1236 scale = (freq == 0) ? sqrt(0.125) : 0.5;
1237 for (time=0; time<8; time++)
1238 itrans_coef[freq][time] = scale*cos((PI/8.0)*freq*(time + 0.5));
1239 }
1240 }
1241
1242 /* perform IDCT matrix multiply for 8x8 coefficient block */
1243
reference_rev_dct(int16 * block)1244 void reference_rev_dct(int16 *block)
1245 {
1246 int i, j, k, v;
1247 double partial_product;
1248 double tmp[64];
1249
1250 for (i=0; i<8; i++)
1251 for (j=0; j<8; j++)
1252 {
1253 partial_product = 0.0;
1254
1255 for (k=0; k<8; k++)
1256 partial_product+= itrans_coef[k][j]*block[8*i+k];
1257
1258 tmp[8*i+j] = partial_product;
1259 }
1260
1261 /* Transpose operation is integrated into address mapping by switching
1262 loop order of i and j */
1263
1264 for (j=0; j<8; j++)
1265 for (i=0; i<8; i++)
1266 {
1267 partial_product = 0.0;
1268
1269 for (k=0; k<8; k++)
1270 partial_product+= itrans_coef[k][i]*tmp[8*k+j];
1271
1272 v = (int)floor(partial_product+0.5);
1273 block[8*i+j] = (v<-256) ? -256 : ((v>255) ? 255 : v);
1274 }
1275 }
1276