1 /* 2 * jidctint.c 3 * 4 * Copyright (C) 1991-1998, Thomas G. Lane. 5 * Modification developed 2002-2016 by Guido Vollbeding. 6 * This file is part of the Independent JPEG Group's software. 7 * For conditions of distribution and use, see the accompanying README file. 8 * 9 * This file contains a slow-but-accurate integer implementation of the 10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 11 * must also perform dequantization of the input coefficients. 12 * 13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 14 * on each row (or vice versa, but it's more convenient to emit a row at 15 * a time). Direct algorithms are also available, but they are much more 16 * complex and seem not to be any faster when reduced to code. 17 * 18 * This implementation is based on an algorithm described in 19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 22 * The primary algorithm described there uses 11 multiplies and 29 adds. 23 * We use their alternate method with 12 multiplies and 32 adds. 24 * The advantage of this method is that no data path contains more than one 25 * multiplication; this allows a very simple and accurate implementation in 26 * scaled fixed-point arithmetic, with a minimal number of shifts. 27 * 28 * We also provide IDCT routines with various output sample block sizes for 29 * direct resolution reduction or enlargement and for direct resolving the 30 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN 31 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block. 32 * 33 * For N<8 we simply take the corresponding low-frequency coefficients of 34 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block 35 * to yield the downscaled outputs. 36 * This can be seen as direct low-pass downsampling from the DCT domain 37 * point of view rather than the usual spatial domain point of view, 38 * yielding significant computational savings and results at least 39 * as good as common bilinear (averaging) spatial downsampling. 40 * 41 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as 42 * lower frequencies and higher frequencies assumed to be zero. 43 * It turns out that the computational effort is similar to the 8x8 IDCT 44 * regarding the output size. 45 * Furthermore, the scaling and descaling is the same for all IDCT sizes. 46 * 47 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 48 * since there would be too many additional constants to pre-calculate. 49 */ 50 51 #define JPEG_INTERNALS 52 #include "jinclude.h" 53 #include "jpeglib.h" 54 #include "jdct.h" /* Private declarations for DCT subsystem */ 55 56 #ifdef DCT_ISLOW_SUPPORTED 57 58 59 /* 60 * This module is specialized to the case DCTSIZE = 8. 61 */ 62 63 #if DCTSIZE != 8 64 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 65 #endif 66 67 68 /* 69 * The poop on this scaling stuff is as follows: 70 * 71 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 72 * larger than the true IDCT outputs. The final outputs are therefore 73 * a factor of N larger than desired; since N=8 this can be cured by 74 * a simple right shift at the end of the algorithm. The advantage of 75 * this arrangement is that we save two multiplications per 1-D IDCT, 76 * because the y0 and y4 inputs need not be divided by sqrt(N). 77 * 78 * We have to do addition and subtraction of the integer inputs, which 79 * is no problem, and multiplication by fractional constants, which is 80 * a problem to do in integer arithmetic. We multiply all the constants 81 * by CONST_SCALE and convert them to integer constants (thus retaining 82 * CONST_BITS bits of precision in the constants). After doing a 83 * multiplication we have to divide the product by CONST_SCALE, with proper 84 * rounding, to produce the correct output. This division can be done 85 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 86 * as long as possible so that partial sums can be added together with 87 * full fractional precision. 88 * 89 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 90 * they are represented to better-than-integral precision. These outputs 91 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 92 * with the recommended scaling. (To scale up 12-bit sample data further, an 93 * intermediate INT32 array would be needed.) 94 * 95 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 96 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 97 * shows that the values given below are the most effective. 98 */ 99 100 #if BITS_IN_JSAMPLE == 8 101 #define CONST_BITS 13 102 #define PASS1_BITS 2 103 #else 104 #define CONST_BITS 13 105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 106 #endif 107 108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 109 * causing a lot of useless floating-point operations at run time. 110 * To get around this we use the following pre-calculated constants. 111 * If you change CONST_BITS you may want to add appropriate values. 112 * (With a reasonable C compiler, you can just rely on the FIX() macro...) 113 */ 114 115 #if CONST_BITS == 13 116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 128 #else 129 #define FIX_0_298631336 FIX(0.298631336) 130 #define FIX_0_390180644 FIX(0.390180644) 131 #define FIX_0_541196100 FIX(0.541196100) 132 #define FIX_0_765366865 FIX(0.765366865) 133 #define FIX_0_899976223 FIX(0.899976223) 134 #define FIX_1_175875602 FIX(1.175875602) 135 #define FIX_1_501321110 FIX(1.501321110) 136 #define FIX_1_847759065 FIX(1.847759065) 137 #define FIX_1_961570560 FIX(1.961570560) 138 #define FIX_2_053119869 FIX(2.053119869) 139 #define FIX_2_562915447 FIX(2.562915447) 140 #define FIX_3_072711026 FIX(3.072711026) 141 #endif 142 143 144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 145 * For 8-bit samples with the recommended scaling, all the variable 146 * and constant values involved are no more than 16 bits wide, so a 147 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 148 * For 12-bit samples, a full 32-bit multiplication will be needed. 149 */ 150 151 #if BITS_IN_JSAMPLE == 8 152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) 153 #else 154 #define MULTIPLY(var,const) ((var) * (const)) 155 #endif 156 157 158 /* Dequantize a coefficient by multiplying it by the multiplier-table 159 * entry; produce an int result. In this module, both inputs and result 160 * are 16 bits or less, so either int or short multiply will work. 161 */ 162 163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) 164 165 166 /* 167 * Perform dequantization and inverse DCT on one block of coefficients. 168 * 169 * Optimized algorithm with 12 multiplications in the 1-D kernel. 170 * cK represents sqrt(2) * cos(K*pi/16). 171 */ 172 173 GLOBAL(void) 174 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 175 JCOEFPTR coef_block, 176 JSAMPARRAY output_buf, JDIMENSION output_col) 177 { 178 INT32 tmp0, tmp1, tmp2, tmp3; 179 INT32 tmp10, tmp11, tmp12, tmp13; 180 INT32 z1, z2, z3; 181 JCOEFPTR inptr; 182 ISLOW_MULT_TYPE * quantptr; 183 int * wsptr; 184 JSAMPROW outptr; 185 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 186 int ctr; 187 int workspace[DCTSIZE2]; /* buffers data between passes */ 188 SHIFT_TEMPS 189 190 /* Pass 1: process columns from input, store into work array. 191 * Note results are scaled up by sqrt(8) compared to a true IDCT; 192 * furthermore, we scale the results by 2**PASS1_BITS. 193 */ 194 195 inptr = coef_block; 196 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 197 wsptr = workspace; 198 for (ctr = DCTSIZE; ctr > 0; ctr--) { 199 /* Due to quantization, we will usually find that many of the input 200 * coefficients are zero, especially the AC terms. We can exploit this 201 * by short-circuiting the IDCT calculation for any column in which all 202 * the AC terms are zero. In that case each output is equal to the 203 * DC coefficient (with scale factor as needed). 204 * With typical images and quantization tables, half or more of the 205 * column DCT calculations can be simplified this way. 206 */ 207 208 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 209 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 210 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 211 inptr[DCTSIZE*7] == 0) { 212 /* AC terms all zero */ 213 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 214 215 wsptr[DCTSIZE*0] = dcval; 216 wsptr[DCTSIZE*1] = dcval; 217 wsptr[DCTSIZE*2] = dcval; 218 wsptr[DCTSIZE*3] = dcval; 219 wsptr[DCTSIZE*4] = dcval; 220 wsptr[DCTSIZE*5] = dcval; 221 wsptr[DCTSIZE*6] = dcval; 222 wsptr[DCTSIZE*7] = dcval; 223 224 inptr++; /* advance pointers to next column */ 225 quantptr++; 226 wsptr++; 227 continue; 228 } 229 230 /* Even part: reverse the even part of the forward DCT. 231 * The rotator is c(-6). 232 */ 233 234 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 235 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 236 z2 <<= CONST_BITS; 237 z3 <<= CONST_BITS; 238 /* Add fudge factor here for final descale. */ 239 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 240 241 tmp0 = z2 + z3; 242 tmp1 = z2 - z3; 243 244 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 245 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 246 247 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 248 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 249 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 250 251 tmp10 = tmp0 + tmp2; 252 tmp13 = tmp0 - tmp2; 253 tmp11 = tmp1 + tmp3; 254 tmp12 = tmp1 - tmp3; 255 256 /* Odd part per figure 8; the matrix is unitary and hence its 257 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 258 */ 259 260 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 261 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 262 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 263 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 264 265 z2 = tmp0 + tmp2; 266 z3 = tmp1 + tmp3; 267 268 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 269 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 270 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 271 z2 += z1; 272 z3 += z1; 273 274 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 275 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 276 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 277 tmp0 += z1 + z2; 278 tmp3 += z1 + z3; 279 280 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 281 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 282 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 283 tmp1 += z1 + z3; 284 tmp2 += z1 + z2; 285 286 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 287 288 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 289 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 290 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 291 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 292 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 293 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 294 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 295 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 296 297 inptr++; /* advance pointers to next column */ 298 quantptr++; 299 wsptr++; 300 } 301 302 /* Pass 2: process rows from work array, store into output array. 303 * Note that we must descale the results by a factor of 8 == 2**3, 304 * and also undo the PASS1_BITS scaling. 305 */ 306 307 wsptr = workspace; 308 for (ctr = 0; ctr < DCTSIZE; ctr++) { 309 outptr = output_buf[ctr] + output_col; 310 311 /* Add range center and fudge factor for final descale and range-limit. */ 312 z2 = (INT32) wsptr[0] + 313 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 314 (ONE << (PASS1_BITS+2))); 315 316 /* Rows of zeroes can be exploited in the same way as we did with columns. 317 * However, the column calculation has created many nonzero AC terms, so 318 * the simplification applies less often (typically 5% to 10% of the time). 319 * On machines with very fast multiplication, it's possible that the 320 * test takes more time than it's worth. In that case this section 321 * may be commented out. 322 */ 323 324 #ifndef NO_ZERO_ROW_TEST 325 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && 326 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { 327 /* AC terms all zero */ 328 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3) 329 & RANGE_MASK]; 330 331 outptr[0] = dcval; 332 outptr[1] = dcval; 333 outptr[2] = dcval; 334 outptr[3] = dcval; 335 outptr[4] = dcval; 336 outptr[5] = dcval; 337 outptr[6] = dcval; 338 outptr[7] = dcval; 339 340 wsptr += DCTSIZE; /* advance pointer to next row */ 341 continue; 342 } 343 #endif 344 345 /* Even part: reverse the even part of the forward DCT. 346 * The rotator is c(-6). 347 */ 348 349 z3 = (INT32) wsptr[4]; 350 351 tmp0 = (z2 + z3) << CONST_BITS; 352 tmp1 = (z2 - z3) << CONST_BITS; 353 354 z2 = (INT32) wsptr[2]; 355 z3 = (INT32) wsptr[6]; 356 357 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 358 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 359 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 360 361 tmp10 = tmp0 + tmp2; 362 tmp13 = tmp0 - tmp2; 363 tmp11 = tmp1 + tmp3; 364 tmp12 = tmp1 - tmp3; 365 366 /* Odd part per figure 8; the matrix is unitary and hence its 367 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 368 */ 369 370 tmp0 = (INT32) wsptr[7]; 371 tmp1 = (INT32) wsptr[5]; 372 tmp2 = (INT32) wsptr[3]; 373 tmp3 = (INT32) wsptr[1]; 374 375 z2 = tmp0 + tmp2; 376 z3 = tmp1 + tmp3; 377 378 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 379 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 380 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 381 z2 += z1; 382 z3 += z1; 383 384 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 385 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 386 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 387 tmp0 += z1 + z2; 388 tmp3 += z1 + z3; 389 390 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 391 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 392 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 393 tmp1 += z1 + z3; 394 tmp2 += z1 + z2; 395 396 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 397 398 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 399 CONST_BITS+PASS1_BITS+3) 400 & RANGE_MASK]; 401 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 402 CONST_BITS+PASS1_BITS+3) 403 & RANGE_MASK]; 404 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 405 CONST_BITS+PASS1_BITS+3) 406 & RANGE_MASK]; 407 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 408 CONST_BITS+PASS1_BITS+3) 409 & RANGE_MASK]; 410 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 411 CONST_BITS+PASS1_BITS+3) 412 & RANGE_MASK]; 413 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 414 CONST_BITS+PASS1_BITS+3) 415 & RANGE_MASK]; 416 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 417 CONST_BITS+PASS1_BITS+3) 418 & RANGE_MASK]; 419 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 420 CONST_BITS+PASS1_BITS+3) 421 & RANGE_MASK]; 422 423 wsptr += DCTSIZE; /* advance pointer to next row */ 424 } 425 } 426 427 #ifdef IDCT_SCALING_SUPPORTED 428 429 430 /* 431 * Perform dequantization and inverse DCT on one block of coefficients, 432 * producing a reduced-size 7x7 output block. 433 * 434 * Optimized algorithm with 12 multiplications in the 1-D kernel. 435 * cK represents sqrt(2) * cos(K*pi/14). 436 */ 437 438 GLOBAL(void) 439 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 440 JCOEFPTR coef_block, 441 JSAMPARRAY output_buf, JDIMENSION output_col) 442 { 443 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; 444 INT32 z1, z2, z3; 445 JCOEFPTR inptr; 446 ISLOW_MULT_TYPE * quantptr; 447 int * wsptr; 448 JSAMPROW outptr; 449 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 450 int ctr; 451 int workspace[7*7]; /* buffers data between passes */ 452 SHIFT_TEMPS 453 454 /* Pass 1: process columns from input, store into work array. */ 455 456 inptr = coef_block; 457 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 458 wsptr = workspace; 459 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 460 /* Even part */ 461 462 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 463 tmp13 <<= CONST_BITS; 464 /* Add fudge factor here for final descale. */ 465 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); 466 467 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 468 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 469 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 470 471 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 472 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 473 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 474 tmp0 = z1 + z3; 475 z2 -= tmp0; 476 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 477 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 478 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 479 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 480 481 /* Odd part */ 482 483 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 484 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 485 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 486 487 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 488 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 489 tmp0 = tmp1 - tmp2; 490 tmp1 += tmp2; 491 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 492 tmp1 += tmp2; 493 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 494 tmp0 += z2; 495 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 496 497 /* Final output stage */ 498 499 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 500 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 501 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 502 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 503 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 504 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 505 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); 506 } 507 508 /* Pass 2: process 7 rows from work array, store into output array. */ 509 510 wsptr = workspace; 511 for (ctr = 0; ctr < 7; ctr++) { 512 outptr = output_buf[ctr] + output_col; 513 514 /* Even part */ 515 516 /* Add range center and fudge factor for final descale and range-limit. */ 517 tmp13 = (INT32) wsptr[0] + 518 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 519 (ONE << (PASS1_BITS+2))); 520 tmp13 <<= CONST_BITS; 521 522 z1 = (INT32) wsptr[2]; 523 z2 = (INT32) wsptr[4]; 524 z3 = (INT32) wsptr[6]; 525 526 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 527 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 528 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 529 tmp0 = z1 + z3; 530 z2 -= tmp0; 531 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 532 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 533 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 534 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 535 536 /* Odd part */ 537 538 z1 = (INT32) wsptr[1]; 539 z2 = (INT32) wsptr[3]; 540 z3 = (INT32) wsptr[5]; 541 542 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 543 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 544 tmp0 = tmp1 - tmp2; 545 tmp1 += tmp2; 546 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 547 tmp1 += tmp2; 548 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 549 tmp0 += z2; 550 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 551 552 /* Final output stage */ 553 554 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 555 CONST_BITS+PASS1_BITS+3) 556 & RANGE_MASK]; 557 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 558 CONST_BITS+PASS1_BITS+3) 559 & RANGE_MASK]; 560 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 561 CONST_BITS+PASS1_BITS+3) 562 & RANGE_MASK]; 563 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 564 CONST_BITS+PASS1_BITS+3) 565 & RANGE_MASK]; 566 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 567 CONST_BITS+PASS1_BITS+3) 568 & RANGE_MASK]; 569 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 570 CONST_BITS+PASS1_BITS+3) 571 & RANGE_MASK]; 572 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, 573 CONST_BITS+PASS1_BITS+3) 574 & RANGE_MASK]; 575 576 wsptr += 7; /* advance pointer to next row */ 577 } 578 } 579 580 581 /* 582 * Perform dequantization and inverse DCT on one block of coefficients, 583 * producing a reduced-size 6x6 output block. 584 * 585 * Optimized algorithm with 3 multiplications in the 1-D kernel. 586 * cK represents sqrt(2) * cos(K*pi/12). 587 */ 588 589 GLOBAL(void) 590 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 591 JCOEFPTR coef_block, 592 JSAMPARRAY output_buf, JDIMENSION output_col) 593 { 594 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 595 INT32 z1, z2, z3; 596 JCOEFPTR inptr; 597 ISLOW_MULT_TYPE * quantptr; 598 int * wsptr; 599 JSAMPROW outptr; 600 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 601 int ctr; 602 int workspace[6*6]; /* buffers data between passes */ 603 SHIFT_TEMPS 604 605 /* Pass 1: process columns from input, store into work array. */ 606 607 inptr = coef_block; 608 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 609 wsptr = workspace; 610 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 611 /* Even part */ 612 613 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 614 tmp0 <<= CONST_BITS; 615 /* Add fudge factor here for final descale. */ 616 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 617 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 618 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 619 tmp1 = tmp0 + tmp10; 620 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 621 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 622 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 623 tmp10 = tmp1 + tmp0; 624 tmp12 = tmp1 - tmp0; 625 626 /* Odd part */ 627 628 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 629 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 630 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 631 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 632 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 633 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 634 tmp1 = (z1 - z2 - z3) << PASS1_BITS; 635 636 /* Final output stage */ 637 638 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 639 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 640 wsptr[6*1] = (int) (tmp11 + tmp1); 641 wsptr[6*4] = (int) (tmp11 - tmp1); 642 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 643 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 644 } 645 646 /* Pass 2: process 6 rows from work array, store into output array. */ 647 648 wsptr = workspace; 649 for (ctr = 0; ctr < 6; ctr++) { 650 outptr = output_buf[ctr] + output_col; 651 652 /* Even part */ 653 654 /* Add range center and fudge factor for final descale and range-limit. */ 655 tmp0 = (INT32) wsptr[0] + 656 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 657 (ONE << (PASS1_BITS+2))); 658 tmp0 <<= CONST_BITS; 659 tmp2 = (INT32) wsptr[4]; 660 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 661 tmp1 = tmp0 + tmp10; 662 tmp11 = tmp0 - tmp10 - tmp10; 663 tmp10 = (INT32) wsptr[2]; 664 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 665 tmp10 = tmp1 + tmp0; 666 tmp12 = tmp1 - tmp0; 667 668 /* Odd part */ 669 670 z1 = (INT32) wsptr[1]; 671 z2 = (INT32) wsptr[3]; 672 z3 = (INT32) wsptr[5]; 673 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 674 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 675 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 676 tmp1 = (z1 - z2 - z3) << CONST_BITS; 677 678 /* Final output stage */ 679 680 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 681 CONST_BITS+PASS1_BITS+3) 682 & RANGE_MASK]; 683 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 684 CONST_BITS+PASS1_BITS+3) 685 & RANGE_MASK]; 686 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 687 CONST_BITS+PASS1_BITS+3) 688 & RANGE_MASK]; 689 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 690 CONST_BITS+PASS1_BITS+3) 691 & RANGE_MASK]; 692 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 693 CONST_BITS+PASS1_BITS+3) 694 & RANGE_MASK]; 695 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 696 CONST_BITS+PASS1_BITS+3) 697 & RANGE_MASK]; 698 699 wsptr += 6; /* advance pointer to next row */ 700 } 701 } 702 703 704 /* 705 * Perform dequantization and inverse DCT on one block of coefficients, 706 * producing a reduced-size 5x5 output block. 707 * 708 * Optimized algorithm with 5 multiplications in the 1-D kernel. 709 * cK represents sqrt(2) * cos(K*pi/10). 710 */ 711 712 GLOBAL(void) 713 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 714 JCOEFPTR coef_block, 715 JSAMPARRAY output_buf, JDIMENSION output_col) 716 { 717 INT32 tmp0, tmp1, tmp10, tmp11, tmp12; 718 INT32 z1, z2, z3; 719 JCOEFPTR inptr; 720 ISLOW_MULT_TYPE * quantptr; 721 int * wsptr; 722 JSAMPROW outptr; 723 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 724 int ctr; 725 int workspace[5*5]; /* buffers data between passes */ 726 SHIFT_TEMPS 727 728 /* Pass 1: process columns from input, store into work array. */ 729 730 inptr = coef_block; 731 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 732 wsptr = workspace; 733 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 734 /* Even part */ 735 736 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 737 tmp12 <<= CONST_BITS; 738 /* Add fudge factor here for final descale. */ 739 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 740 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 741 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 742 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 743 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 744 z3 = tmp12 + z2; 745 tmp10 = z3 + z1; 746 tmp11 = z3 - z1; 747 tmp12 -= z2 << 2; 748 749 /* Odd part */ 750 751 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 752 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 753 754 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 755 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 756 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 757 758 /* Final output stage */ 759 760 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 761 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 762 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 763 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 764 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 765 } 766 767 /* Pass 2: process 5 rows from work array, store into output array. */ 768 769 wsptr = workspace; 770 for (ctr = 0; ctr < 5; ctr++) { 771 outptr = output_buf[ctr] + output_col; 772 773 /* Even part */ 774 775 /* Add range center and fudge factor for final descale and range-limit. */ 776 tmp12 = (INT32) wsptr[0] + 777 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 778 (ONE << (PASS1_BITS+2))); 779 tmp12 <<= CONST_BITS; 780 tmp0 = (INT32) wsptr[2]; 781 tmp1 = (INT32) wsptr[4]; 782 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 783 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 784 z3 = tmp12 + z2; 785 tmp10 = z3 + z1; 786 tmp11 = z3 - z1; 787 tmp12 -= z2 << 2; 788 789 /* Odd part */ 790 791 z2 = (INT32) wsptr[1]; 792 z3 = (INT32) wsptr[3]; 793 794 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 795 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 796 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 797 798 /* Final output stage */ 799 800 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 801 CONST_BITS+PASS1_BITS+3) 802 & RANGE_MASK]; 803 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 804 CONST_BITS+PASS1_BITS+3) 805 & RANGE_MASK]; 806 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 807 CONST_BITS+PASS1_BITS+3) 808 & RANGE_MASK]; 809 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 810 CONST_BITS+PASS1_BITS+3) 811 & RANGE_MASK]; 812 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 813 CONST_BITS+PASS1_BITS+3) 814 & RANGE_MASK]; 815 816 wsptr += 5; /* advance pointer to next row */ 817 } 818 } 819 820 821 /* 822 * Perform dequantization and inverse DCT on one block of coefficients, 823 * producing a reduced-size 4x4 output block. 824 * 825 * Optimized algorithm with 3 multiplications in the 1-D kernel. 826 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 827 */ 828 829 GLOBAL(void) 830 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 831 JCOEFPTR coef_block, 832 JSAMPARRAY output_buf, JDIMENSION output_col) 833 { 834 INT32 tmp0, tmp2, tmp10, tmp12; 835 INT32 z1, z2, z3; 836 JCOEFPTR inptr; 837 ISLOW_MULT_TYPE * quantptr; 838 int * wsptr; 839 JSAMPROW outptr; 840 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 841 int ctr; 842 int workspace[4*4]; /* buffers data between passes */ 843 SHIFT_TEMPS 844 845 /* Pass 1: process columns from input, store into work array. */ 846 847 inptr = coef_block; 848 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 849 wsptr = workspace; 850 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { 851 /* Even part */ 852 853 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 854 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 855 856 tmp10 = (tmp0 + tmp2) << PASS1_BITS; 857 tmp12 = (tmp0 - tmp2) << PASS1_BITS; 858 859 /* Odd part */ 860 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 861 862 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 863 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 864 865 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 866 /* Add fudge factor here for final descale. */ 867 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 868 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ 869 CONST_BITS-PASS1_BITS); 870 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ 871 CONST_BITS-PASS1_BITS); 872 873 /* Final output stage */ 874 875 wsptr[4*0] = (int) (tmp10 + tmp0); 876 wsptr[4*3] = (int) (tmp10 - tmp0); 877 wsptr[4*1] = (int) (tmp12 + tmp2); 878 wsptr[4*2] = (int) (tmp12 - tmp2); 879 } 880 881 /* Pass 2: process 4 rows from work array, store into output array. */ 882 883 wsptr = workspace; 884 for (ctr = 0; ctr < 4; ctr++) { 885 outptr = output_buf[ctr] + output_col; 886 887 /* Even part */ 888 889 /* Add range center and fudge factor for final descale and range-limit. */ 890 tmp0 = (INT32) wsptr[0] + 891 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 892 (ONE << (PASS1_BITS+2))); 893 tmp2 = (INT32) wsptr[2]; 894 895 tmp10 = (tmp0 + tmp2) << CONST_BITS; 896 tmp12 = (tmp0 - tmp2) << CONST_BITS; 897 898 /* Odd part */ 899 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 900 901 z2 = (INT32) wsptr[1]; 902 z3 = (INT32) wsptr[3]; 903 904 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 905 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 906 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 907 908 /* Final output stage */ 909 910 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 911 CONST_BITS+PASS1_BITS+3) 912 & RANGE_MASK]; 913 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 914 CONST_BITS+PASS1_BITS+3) 915 & RANGE_MASK]; 916 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 917 CONST_BITS+PASS1_BITS+3) 918 & RANGE_MASK]; 919 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 920 CONST_BITS+PASS1_BITS+3) 921 & RANGE_MASK]; 922 923 wsptr += 4; /* advance pointer to next row */ 924 } 925 } 926 927 928 /* 929 * Perform dequantization and inverse DCT on one block of coefficients, 930 * producing a reduced-size 3x3 output block. 931 * 932 * Optimized algorithm with 2 multiplications in the 1-D kernel. 933 * cK represents sqrt(2) * cos(K*pi/6). 934 */ 935 936 GLOBAL(void) 937 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 938 JCOEFPTR coef_block, 939 JSAMPARRAY output_buf, JDIMENSION output_col) 940 { 941 INT32 tmp0, tmp2, tmp10, tmp12; 942 JCOEFPTR inptr; 943 ISLOW_MULT_TYPE * quantptr; 944 int * wsptr; 945 JSAMPROW outptr; 946 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 947 int ctr; 948 int workspace[3*3]; /* buffers data between passes */ 949 SHIFT_TEMPS 950 951 /* Pass 1: process columns from input, store into work array. */ 952 953 inptr = coef_block; 954 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 955 wsptr = workspace; 956 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 957 /* Even part */ 958 959 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 960 tmp0 <<= CONST_BITS; 961 /* Add fudge factor here for final descale. */ 962 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 963 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 964 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 965 tmp10 = tmp0 + tmp12; 966 tmp2 = tmp0 - tmp12 - tmp12; 967 968 /* Odd part */ 969 970 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 971 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 972 973 /* Final output stage */ 974 975 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 976 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 977 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 978 } 979 980 /* Pass 2: process 3 rows from work array, store into output array. */ 981 982 wsptr = workspace; 983 for (ctr = 0; ctr < 3; ctr++) { 984 outptr = output_buf[ctr] + output_col; 985 986 /* Even part */ 987 988 /* Add range center and fudge factor for final descale and range-limit. */ 989 tmp0 = (INT32) wsptr[0] + 990 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 991 (ONE << (PASS1_BITS+2))); 992 tmp0 <<= CONST_BITS; 993 tmp2 = (INT32) wsptr[2]; 994 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 995 tmp10 = tmp0 + tmp12; 996 tmp2 = tmp0 - tmp12 - tmp12; 997 998 /* Odd part */ 999 1000 tmp12 = (INT32) wsptr[1]; 1001 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 1002 1003 /* Final output stage */ 1004 1005 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1006 CONST_BITS+PASS1_BITS+3) 1007 & RANGE_MASK]; 1008 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1009 CONST_BITS+PASS1_BITS+3) 1010 & RANGE_MASK]; 1011 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 1012 CONST_BITS+PASS1_BITS+3) 1013 & RANGE_MASK]; 1014 1015 wsptr += 3; /* advance pointer to next row */ 1016 } 1017 } 1018 1019 1020 /* 1021 * Perform dequantization and inverse DCT on one block of coefficients, 1022 * producing a reduced-size 2x2 output block. 1023 * 1024 * Multiplication-less algorithm. 1025 */ 1026 1027 GLOBAL(void) 1028 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1029 JCOEFPTR coef_block, 1030 JSAMPARRAY output_buf, JDIMENSION output_col) 1031 { 1032 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 1033 ISLOW_MULT_TYPE * quantptr; 1034 JSAMPROW outptr; 1035 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1036 ISHIFT_TEMPS 1037 1038 /* Pass 1: process columns from input. */ 1039 1040 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1041 1042 /* Column 0 */ 1043 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); 1044 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); 1045 /* Add range center and fudge factor for final descale and range-limit. */ 1046 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 1047 1048 tmp0 = tmp4 + tmp5; 1049 tmp2 = tmp4 - tmp5; 1050 1051 /* Column 1 */ 1052 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]); 1053 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]); 1054 1055 tmp1 = tmp4 + tmp5; 1056 tmp3 = tmp4 - tmp5; 1057 1058 /* Pass 2: process 2 rows, store into output array. */ 1059 1060 /* Row 0 */ 1061 outptr = output_buf[0] + output_col; 1062 1063 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 1064 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 1065 1066 /* Row 1 */ 1067 outptr = output_buf[1] + output_col; 1068 1069 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; 1070 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; 1071 } 1072 1073 1074 /* 1075 * Perform dequantization and inverse DCT on one block of coefficients, 1076 * producing a reduced-size 1x1 output block. 1077 * 1078 * We hardly need an inverse DCT routine for this: just take the 1079 * average pixel value, which is one-eighth of the DC coefficient. 1080 */ 1081 1082 GLOBAL(void) 1083 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1084 JCOEFPTR coef_block, 1085 JSAMPARRAY output_buf, JDIMENSION output_col) 1086 { 1087 DCTELEM dcval; 1088 ISLOW_MULT_TYPE * quantptr; 1089 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1090 ISHIFT_TEMPS 1091 1092 /* 1x1 is trivial: just take the DC coefficient divided by 8. */ 1093 1094 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1095 1096 dcval = DEQUANTIZE(coef_block[0], quantptr[0]); 1097 /* Add range center and fudge factor for descale and range-limit. */ 1098 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 1099 1100 output_buf[0][output_col] = 1101 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK]; 1102 } 1103 1104 1105 /* 1106 * Perform dequantization and inverse DCT on one block of coefficients, 1107 * producing a 9x9 output block. 1108 * 1109 * Optimized algorithm with 10 multiplications in the 1-D kernel. 1110 * cK represents sqrt(2) * cos(K*pi/18). 1111 */ 1112 1113 GLOBAL(void) 1114 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1115 JCOEFPTR coef_block, 1116 JSAMPARRAY output_buf, JDIMENSION output_col) 1117 { 1118 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; 1119 INT32 z1, z2, z3, z4; 1120 JCOEFPTR inptr; 1121 ISLOW_MULT_TYPE * quantptr; 1122 int * wsptr; 1123 JSAMPROW outptr; 1124 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1125 int ctr; 1126 int workspace[8*9]; /* buffers data between passes */ 1127 SHIFT_TEMPS 1128 1129 /* Pass 1: process columns from input, store into work array. */ 1130 1131 inptr = coef_block; 1132 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1133 wsptr = workspace; 1134 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1135 /* Even part */ 1136 1137 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1138 tmp0 <<= CONST_BITS; 1139 /* Add fudge factor here for final descale. */ 1140 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 1141 1142 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1143 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1144 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1145 1146 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 1147 tmp1 = tmp0 + tmp3; 1148 tmp2 = tmp0 - tmp3 - tmp3; 1149 1150 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 1151 tmp11 = tmp2 + tmp0; 1152 tmp14 = tmp2 - tmp0 - tmp0; 1153 1154 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 1155 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 1156 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 1157 1158 tmp10 = tmp1 + tmp0 - tmp3; 1159 tmp12 = tmp1 - tmp0 + tmp2; 1160 tmp13 = tmp1 - tmp2 + tmp3; 1161 1162 /* Odd part */ 1163 1164 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1165 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1166 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1167 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1168 1169 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 1170 1171 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 1172 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 1173 tmp0 = tmp2 + tmp3 - z2; 1174 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 1175 tmp2 += z2 - tmp1; 1176 tmp3 += z2 + tmp1; 1177 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 1178 1179 /* Final output stage */ 1180 1181 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 1182 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 1183 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 1184 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 1185 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 1186 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 1187 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); 1188 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); 1189 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); 1190 } 1191 1192 /* Pass 2: process 9 rows from work array, store into output array. */ 1193 1194 wsptr = workspace; 1195 for (ctr = 0; ctr < 9; ctr++) { 1196 outptr = output_buf[ctr] + output_col; 1197 1198 /* Even part */ 1199 1200 /* Add range center and fudge factor for final descale and range-limit. */ 1201 tmp0 = (INT32) wsptr[0] + 1202 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 1203 (ONE << (PASS1_BITS+2))); 1204 tmp0 <<= CONST_BITS; 1205 1206 z1 = (INT32) wsptr[2]; 1207 z2 = (INT32) wsptr[4]; 1208 z3 = (INT32) wsptr[6]; 1209 1210 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 1211 tmp1 = tmp0 + tmp3; 1212 tmp2 = tmp0 - tmp3 - tmp3; 1213 1214 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 1215 tmp11 = tmp2 + tmp0; 1216 tmp14 = tmp2 - tmp0 - tmp0; 1217 1218 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 1219 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 1220 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 1221 1222 tmp10 = tmp1 + tmp0 - tmp3; 1223 tmp12 = tmp1 - tmp0 + tmp2; 1224 tmp13 = tmp1 - tmp2 + tmp3; 1225 1226 /* Odd part */ 1227 1228 z1 = (INT32) wsptr[1]; 1229 z2 = (INT32) wsptr[3]; 1230 z3 = (INT32) wsptr[5]; 1231 z4 = (INT32) wsptr[7]; 1232 1233 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 1234 1235 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 1236 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 1237 tmp0 = tmp2 + tmp3 - z2; 1238 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 1239 tmp2 += z2 - tmp1; 1240 tmp3 += z2 + tmp1; 1241 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 1242 1243 /* Final output stage */ 1244 1245 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1246 CONST_BITS+PASS1_BITS+3) 1247 & RANGE_MASK]; 1248 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1249 CONST_BITS+PASS1_BITS+3) 1250 & RANGE_MASK]; 1251 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1252 CONST_BITS+PASS1_BITS+3) 1253 & RANGE_MASK]; 1254 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1255 CONST_BITS+PASS1_BITS+3) 1256 & RANGE_MASK]; 1257 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 1258 CONST_BITS+PASS1_BITS+3) 1259 & RANGE_MASK]; 1260 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 1261 CONST_BITS+PASS1_BITS+3) 1262 & RANGE_MASK]; 1263 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, 1264 CONST_BITS+PASS1_BITS+3) 1265 & RANGE_MASK]; 1266 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, 1267 CONST_BITS+PASS1_BITS+3) 1268 & RANGE_MASK]; 1269 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, 1270 CONST_BITS+PASS1_BITS+3) 1271 & RANGE_MASK]; 1272 1273 wsptr += 8; /* advance pointer to next row */ 1274 } 1275 } 1276 1277 1278 /* 1279 * Perform dequantization and inverse DCT on one block of coefficients, 1280 * producing a 10x10 output block. 1281 * 1282 * Optimized algorithm with 12 multiplications in the 1-D kernel. 1283 * cK represents sqrt(2) * cos(K*pi/20). 1284 */ 1285 1286 GLOBAL(void) 1287 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1288 JCOEFPTR coef_block, 1289 JSAMPARRAY output_buf, JDIMENSION output_col) 1290 { 1291 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1292 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 1293 INT32 z1, z2, z3, z4, z5; 1294 JCOEFPTR inptr; 1295 ISLOW_MULT_TYPE * quantptr; 1296 int * wsptr; 1297 JSAMPROW outptr; 1298 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1299 int ctr; 1300 int workspace[8*10]; /* buffers data between passes */ 1301 SHIFT_TEMPS 1302 1303 /* Pass 1: process columns from input, store into work array. */ 1304 1305 inptr = coef_block; 1306 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1307 wsptr = workspace; 1308 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1309 /* Even part */ 1310 1311 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1312 z3 <<= CONST_BITS; 1313 /* Add fudge factor here for final descale. */ 1314 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1315 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1316 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1317 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1318 tmp10 = z3 + z1; 1319 tmp11 = z3 - z2; 1320 1321 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 1322 CONST_BITS-PASS1_BITS); 1323 1324 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1325 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1326 1327 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1328 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1329 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1330 1331 tmp20 = tmp10 + tmp12; 1332 tmp24 = tmp10 - tmp12; 1333 tmp21 = tmp11 + tmp13; 1334 tmp23 = tmp11 - tmp13; 1335 1336 /* Odd part */ 1337 1338 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1339 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1340 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1341 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1342 1343 tmp11 = z2 + z4; 1344 tmp13 = z2 - z4; 1345 1346 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1347 z5 = z3 << CONST_BITS; 1348 1349 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1350 z4 = z5 + tmp12; 1351 1352 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1353 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1354 1355 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1356 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1357 1358 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 1359 1360 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1361 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1362 1363 /* Final output stage */ 1364 1365 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1366 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1367 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1368 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1369 wsptr[8*2] = (int) (tmp22 + tmp12); 1370 wsptr[8*7] = (int) (tmp22 - tmp12); 1371 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1372 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1373 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1374 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1375 } 1376 1377 /* Pass 2: process 10 rows from work array, store into output array. */ 1378 1379 wsptr = workspace; 1380 for (ctr = 0; ctr < 10; ctr++) { 1381 outptr = output_buf[ctr] + output_col; 1382 1383 /* Even part */ 1384 1385 /* Add range center and fudge factor for final descale and range-limit. */ 1386 z3 = (INT32) wsptr[0] + 1387 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 1388 (ONE << (PASS1_BITS+2))); 1389 z3 <<= CONST_BITS; 1390 z4 = (INT32) wsptr[4]; 1391 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1392 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1393 tmp10 = z3 + z1; 1394 tmp11 = z3 - z2; 1395 1396 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 1397 1398 z2 = (INT32) wsptr[2]; 1399 z3 = (INT32) wsptr[6]; 1400 1401 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1402 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1403 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1404 1405 tmp20 = tmp10 + tmp12; 1406 tmp24 = tmp10 - tmp12; 1407 tmp21 = tmp11 + tmp13; 1408 tmp23 = tmp11 - tmp13; 1409 1410 /* Odd part */ 1411 1412 z1 = (INT32) wsptr[1]; 1413 z2 = (INT32) wsptr[3]; 1414 z3 = (INT32) wsptr[5]; 1415 z3 <<= CONST_BITS; 1416 z4 = (INT32) wsptr[7]; 1417 1418 tmp11 = z2 + z4; 1419 tmp13 = z2 - z4; 1420 1421 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1422 1423 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1424 z4 = z3 + tmp12; 1425 1426 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1427 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1428 1429 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1430 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1431 1432 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 1433 1434 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1435 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1436 1437 /* Final output stage */ 1438 1439 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1440 CONST_BITS+PASS1_BITS+3) 1441 & RANGE_MASK]; 1442 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1443 CONST_BITS+PASS1_BITS+3) 1444 & RANGE_MASK]; 1445 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1446 CONST_BITS+PASS1_BITS+3) 1447 & RANGE_MASK]; 1448 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1449 CONST_BITS+PASS1_BITS+3) 1450 & RANGE_MASK]; 1451 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1452 CONST_BITS+PASS1_BITS+3) 1453 & RANGE_MASK]; 1454 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1455 CONST_BITS+PASS1_BITS+3) 1456 & RANGE_MASK]; 1457 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1458 CONST_BITS+PASS1_BITS+3) 1459 & RANGE_MASK]; 1460 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1461 CONST_BITS+PASS1_BITS+3) 1462 & RANGE_MASK]; 1463 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1464 CONST_BITS+PASS1_BITS+3) 1465 & RANGE_MASK]; 1466 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1467 CONST_BITS+PASS1_BITS+3) 1468 & RANGE_MASK]; 1469 1470 wsptr += 8; /* advance pointer to next row */ 1471 } 1472 } 1473 1474 1475 /* 1476 * Perform dequantization and inverse DCT on one block of coefficients, 1477 * producing a 11x11 output block. 1478 * 1479 * Optimized algorithm with 24 multiplications in the 1-D kernel. 1480 * cK represents sqrt(2) * cos(K*pi/22). 1481 */ 1482 1483 GLOBAL(void) 1484 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1485 JCOEFPTR coef_block, 1486 JSAMPARRAY output_buf, JDIMENSION output_col) 1487 { 1488 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1489 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1490 INT32 z1, z2, z3, z4; 1491 JCOEFPTR inptr; 1492 ISLOW_MULT_TYPE * quantptr; 1493 int * wsptr; 1494 JSAMPROW outptr; 1495 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1496 int ctr; 1497 int workspace[8*11]; /* buffers data between passes */ 1498 SHIFT_TEMPS 1499 1500 /* Pass 1: process columns from input, store into work array. */ 1501 1502 inptr = coef_block; 1503 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1504 wsptr = workspace; 1505 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1506 /* Even part */ 1507 1508 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1509 tmp10 <<= CONST_BITS; 1510 /* Add fudge factor here for final descale. */ 1511 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 1512 1513 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1514 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1515 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1516 1517 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1518 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1519 z4 = z1 + z3; 1520 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1521 z4 -= z2; 1522 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1523 tmp21 = tmp20 + tmp23 + tmp25 - 1524 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1525 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1526 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1527 tmp24 += tmp25; 1528 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1529 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1530 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1531 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1532 1533 /* Odd part */ 1534 1535 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1536 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1537 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1538 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1539 1540 tmp11 = z1 + z2; 1541 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1542 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1543 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1544 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1545 tmp10 = tmp11 + tmp12 + tmp13 - 1546 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1547 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1548 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1549 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1550 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1551 tmp11 += z1; 1552 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1553 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1554 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1555 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1556 1557 /* Final output stage */ 1558 1559 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1560 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1561 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1562 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1563 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1564 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1565 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1566 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1567 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1568 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1569 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); 1570 } 1571 1572 /* Pass 2: process 11 rows from work array, store into output array. */ 1573 1574 wsptr = workspace; 1575 for (ctr = 0; ctr < 11; ctr++) { 1576 outptr = output_buf[ctr] + output_col; 1577 1578 /* Even part */ 1579 1580 /* Add range center and fudge factor for final descale and range-limit. */ 1581 tmp10 = (INT32) wsptr[0] + 1582 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 1583 (ONE << (PASS1_BITS+2))); 1584 tmp10 <<= CONST_BITS; 1585 1586 z1 = (INT32) wsptr[2]; 1587 z2 = (INT32) wsptr[4]; 1588 z3 = (INT32) wsptr[6]; 1589 1590 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1591 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1592 z4 = z1 + z3; 1593 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1594 z4 -= z2; 1595 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1596 tmp21 = tmp20 + tmp23 + tmp25 - 1597 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1598 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1599 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1600 tmp24 += tmp25; 1601 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1602 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1603 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1604 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1605 1606 /* Odd part */ 1607 1608 z1 = (INT32) wsptr[1]; 1609 z2 = (INT32) wsptr[3]; 1610 z3 = (INT32) wsptr[5]; 1611 z4 = (INT32) wsptr[7]; 1612 1613 tmp11 = z1 + z2; 1614 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1615 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1616 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1617 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1618 tmp10 = tmp11 + tmp12 + tmp13 - 1619 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1620 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1621 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1622 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1623 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1624 tmp11 += z1; 1625 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1626 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1627 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1628 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1629 1630 /* Final output stage */ 1631 1632 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1633 CONST_BITS+PASS1_BITS+3) 1634 & RANGE_MASK]; 1635 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1636 CONST_BITS+PASS1_BITS+3) 1637 & RANGE_MASK]; 1638 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1639 CONST_BITS+PASS1_BITS+3) 1640 & RANGE_MASK]; 1641 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1642 CONST_BITS+PASS1_BITS+3) 1643 & RANGE_MASK]; 1644 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1645 CONST_BITS+PASS1_BITS+3) 1646 & RANGE_MASK]; 1647 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1648 CONST_BITS+PASS1_BITS+3) 1649 & RANGE_MASK]; 1650 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1651 CONST_BITS+PASS1_BITS+3) 1652 & RANGE_MASK]; 1653 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1654 CONST_BITS+PASS1_BITS+3) 1655 & RANGE_MASK]; 1656 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1657 CONST_BITS+PASS1_BITS+3) 1658 & RANGE_MASK]; 1659 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1660 CONST_BITS+PASS1_BITS+3) 1661 & RANGE_MASK]; 1662 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, 1663 CONST_BITS+PASS1_BITS+3) 1664 & RANGE_MASK]; 1665 1666 wsptr += 8; /* advance pointer to next row */ 1667 } 1668 } 1669 1670 1671 /* 1672 * Perform dequantization and inverse DCT on one block of coefficients, 1673 * producing a 12x12 output block. 1674 * 1675 * Optimized algorithm with 15 multiplications in the 1-D kernel. 1676 * cK represents sqrt(2) * cos(K*pi/24). 1677 */ 1678 1679 GLOBAL(void) 1680 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1681 JCOEFPTR coef_block, 1682 JSAMPARRAY output_buf, JDIMENSION output_col) 1683 { 1684 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1685 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1686 INT32 z1, z2, z3, z4; 1687 JCOEFPTR inptr; 1688 ISLOW_MULT_TYPE * quantptr; 1689 int * wsptr; 1690 JSAMPROW outptr; 1691 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1692 int ctr; 1693 int workspace[8*12]; /* buffers data between passes */ 1694 SHIFT_TEMPS 1695 1696 /* Pass 1: process columns from input, store into work array. */ 1697 1698 inptr = coef_block; 1699 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1700 wsptr = workspace; 1701 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1702 /* Even part */ 1703 1704 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1705 z3 <<= CONST_BITS; 1706 /* Add fudge factor here for final descale. */ 1707 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1708 1709 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1710 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1711 1712 tmp10 = z3 + z4; 1713 tmp11 = z3 - z4; 1714 1715 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1716 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1717 z1 <<= CONST_BITS; 1718 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1719 z2 <<= CONST_BITS; 1720 1721 tmp12 = z1 - z2; 1722 1723 tmp21 = z3 + tmp12; 1724 tmp24 = z3 - tmp12; 1725 1726 tmp12 = z4 + z2; 1727 1728 tmp20 = tmp10 + tmp12; 1729 tmp25 = tmp10 - tmp12; 1730 1731 tmp12 = z4 - z1 - z2; 1732 1733 tmp22 = tmp11 + tmp12; 1734 tmp23 = tmp11 - tmp12; 1735 1736 /* Odd part */ 1737 1738 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1739 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1740 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1741 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1742 1743 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1744 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1745 1746 tmp10 = z1 + z3; 1747 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1748 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1749 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1750 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1751 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1752 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1753 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1754 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1755 1756 z1 -= z4; 1757 z2 -= z3; 1758 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1759 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1760 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1761 1762 /* Final output stage */ 1763 1764 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1765 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1766 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1767 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1768 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1769 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1770 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1771 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1772 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1773 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1774 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1775 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1776 } 1777 1778 /* Pass 2: process 12 rows from work array, store into output array. */ 1779 1780 wsptr = workspace; 1781 for (ctr = 0; ctr < 12; ctr++) { 1782 outptr = output_buf[ctr] + output_col; 1783 1784 /* Even part */ 1785 1786 /* Add range center and fudge factor for final descale and range-limit. */ 1787 z3 = (INT32) wsptr[0] + 1788 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 1789 (ONE << (PASS1_BITS+2))); 1790 z3 <<= CONST_BITS; 1791 1792 z4 = (INT32) wsptr[4]; 1793 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1794 1795 tmp10 = z3 + z4; 1796 tmp11 = z3 - z4; 1797 1798 z1 = (INT32) wsptr[2]; 1799 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1800 z1 <<= CONST_BITS; 1801 z2 = (INT32) wsptr[6]; 1802 z2 <<= CONST_BITS; 1803 1804 tmp12 = z1 - z2; 1805 1806 tmp21 = z3 + tmp12; 1807 tmp24 = z3 - tmp12; 1808 1809 tmp12 = z4 + z2; 1810 1811 tmp20 = tmp10 + tmp12; 1812 tmp25 = tmp10 - tmp12; 1813 1814 tmp12 = z4 - z1 - z2; 1815 1816 tmp22 = tmp11 + tmp12; 1817 tmp23 = tmp11 - tmp12; 1818 1819 /* Odd part */ 1820 1821 z1 = (INT32) wsptr[1]; 1822 z2 = (INT32) wsptr[3]; 1823 z3 = (INT32) wsptr[5]; 1824 z4 = (INT32) wsptr[7]; 1825 1826 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1827 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1828 1829 tmp10 = z1 + z3; 1830 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1831 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1832 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1833 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1834 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1835 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1836 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1837 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1838 1839 z1 -= z4; 1840 z2 -= z3; 1841 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1842 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1843 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1844 1845 /* Final output stage */ 1846 1847 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1848 CONST_BITS+PASS1_BITS+3) 1849 & RANGE_MASK]; 1850 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1851 CONST_BITS+PASS1_BITS+3) 1852 & RANGE_MASK]; 1853 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1854 CONST_BITS+PASS1_BITS+3) 1855 & RANGE_MASK]; 1856 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1857 CONST_BITS+PASS1_BITS+3) 1858 & RANGE_MASK]; 1859 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1860 CONST_BITS+PASS1_BITS+3) 1861 & RANGE_MASK]; 1862 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1863 CONST_BITS+PASS1_BITS+3) 1864 & RANGE_MASK]; 1865 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1866 CONST_BITS+PASS1_BITS+3) 1867 & RANGE_MASK]; 1868 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1869 CONST_BITS+PASS1_BITS+3) 1870 & RANGE_MASK]; 1871 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1872 CONST_BITS+PASS1_BITS+3) 1873 & RANGE_MASK]; 1874 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1875 CONST_BITS+PASS1_BITS+3) 1876 & RANGE_MASK]; 1877 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1878 CONST_BITS+PASS1_BITS+3) 1879 & RANGE_MASK]; 1880 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1881 CONST_BITS+PASS1_BITS+3) 1882 & RANGE_MASK]; 1883 1884 wsptr += 8; /* advance pointer to next row */ 1885 } 1886 } 1887 1888 1889 /* 1890 * Perform dequantization and inverse DCT on one block of coefficients, 1891 * producing a 13x13 output block. 1892 * 1893 * Optimized algorithm with 29 multiplications in the 1-D kernel. 1894 * cK represents sqrt(2) * cos(K*pi/26). 1895 */ 1896 1897 GLOBAL(void) 1898 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1899 JCOEFPTR coef_block, 1900 JSAMPARRAY output_buf, JDIMENSION output_col) 1901 { 1902 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1903 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 1904 INT32 z1, z2, z3, z4; 1905 JCOEFPTR inptr; 1906 ISLOW_MULT_TYPE * quantptr; 1907 int * wsptr; 1908 JSAMPROW outptr; 1909 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1910 int ctr; 1911 int workspace[8*13]; /* buffers data between passes */ 1912 SHIFT_TEMPS 1913 1914 /* Pass 1: process columns from input, store into work array. */ 1915 1916 inptr = coef_block; 1917 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1918 wsptr = workspace; 1919 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1920 /* Even part */ 1921 1922 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1923 z1 <<= CONST_BITS; 1924 /* Add fudge factor here for final descale. */ 1925 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1926 1927 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1928 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1929 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1930 1931 tmp10 = z3 + z4; 1932 tmp11 = z3 - z4; 1933 1934 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 1935 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 1936 1937 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 1938 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 1939 1940 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 1941 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 1942 1943 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 1944 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 1945 1946 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 1947 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 1948 1949 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 1950 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 1951 1952 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 1953 1954 /* Odd part */ 1955 1956 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1957 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1958 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1959 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1960 1961 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 1962 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 1963 tmp15 = z1 + z4; 1964 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 1965 tmp10 = tmp11 + tmp12 + tmp13 - 1966 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 1967 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 1968 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 1969 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 1970 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 1971 tmp11 += tmp14; 1972 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 1973 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 1974 tmp12 += tmp14; 1975 tmp13 += tmp14; 1976 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 1977 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 1978 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 1979 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 1980 tmp14 += z1; 1981 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 1982 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 1983 1984 /* Final output stage */ 1985 1986 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1987 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1988 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1989 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1990 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1991 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1992 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1993 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1994 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1995 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1996 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1997 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1998 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); 1999 } 2000 2001 /* Pass 2: process 13 rows from work array, store into output array. */ 2002 2003 wsptr = workspace; 2004 for (ctr = 0; ctr < 13; ctr++) { 2005 outptr = output_buf[ctr] + output_col; 2006 2007 /* Even part */ 2008 2009 /* Add range center and fudge factor for final descale and range-limit. */ 2010 z1 = (INT32) wsptr[0] + 2011 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 2012 (ONE << (PASS1_BITS+2))); 2013 z1 <<= CONST_BITS; 2014 2015 z2 = (INT32) wsptr[2]; 2016 z3 = (INT32) wsptr[4]; 2017 z4 = (INT32) wsptr[6]; 2018 2019 tmp10 = z3 + z4; 2020 tmp11 = z3 - z4; 2021 2022 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 2023 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 2024 2025 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 2026 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 2027 2028 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 2029 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 2030 2031 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 2032 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 2033 2034 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 2035 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 2036 2037 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 2038 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 2039 2040 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 2041 2042 /* Odd part */ 2043 2044 z1 = (INT32) wsptr[1]; 2045 z2 = (INT32) wsptr[3]; 2046 z3 = (INT32) wsptr[5]; 2047 z4 = (INT32) wsptr[7]; 2048 2049 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 2050 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 2051 tmp15 = z1 + z4; 2052 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 2053 tmp10 = tmp11 + tmp12 + tmp13 - 2054 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 2055 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 2056 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 2057 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 2058 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 2059 tmp11 += tmp14; 2060 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 2061 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 2062 tmp12 += tmp14; 2063 tmp13 += tmp14; 2064 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 2065 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 2066 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 2067 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 2068 tmp14 += z1; 2069 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 2070 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 2071 2072 /* Final output stage */ 2073 2074 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 2075 CONST_BITS+PASS1_BITS+3) 2076 & RANGE_MASK]; 2077 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 2078 CONST_BITS+PASS1_BITS+3) 2079 & RANGE_MASK]; 2080 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 2081 CONST_BITS+PASS1_BITS+3) 2082 & RANGE_MASK]; 2083 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 2084 CONST_BITS+PASS1_BITS+3) 2085 & RANGE_MASK]; 2086 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 2087 CONST_BITS+PASS1_BITS+3) 2088 & RANGE_MASK]; 2089 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 2090 CONST_BITS+PASS1_BITS+3) 2091 & RANGE_MASK]; 2092 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 2093 CONST_BITS+PASS1_BITS+3) 2094 & RANGE_MASK]; 2095 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 2096 CONST_BITS+PASS1_BITS+3) 2097 & RANGE_MASK]; 2098 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 2099 CONST_BITS+PASS1_BITS+3) 2100 & RANGE_MASK]; 2101 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 2102 CONST_BITS+PASS1_BITS+3) 2103 & RANGE_MASK]; 2104 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 2105 CONST_BITS+PASS1_BITS+3) 2106 & RANGE_MASK]; 2107 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 2108 CONST_BITS+PASS1_BITS+3) 2109 & RANGE_MASK]; 2110 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, 2111 CONST_BITS+PASS1_BITS+3) 2112 & RANGE_MASK]; 2113 2114 wsptr += 8; /* advance pointer to next row */ 2115 } 2116 } 2117 2118 2119 /* 2120 * Perform dequantization and inverse DCT on one block of coefficients, 2121 * producing a 14x14 output block. 2122 * 2123 * Optimized algorithm with 20 multiplications in the 1-D kernel. 2124 * cK represents sqrt(2) * cos(K*pi/28). 2125 */ 2126 2127 GLOBAL(void) 2128 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2129 JCOEFPTR coef_block, 2130 JSAMPARRAY output_buf, JDIMENSION output_col) 2131 { 2132 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 2133 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 2134 INT32 z1, z2, z3, z4; 2135 JCOEFPTR inptr; 2136 ISLOW_MULT_TYPE * quantptr; 2137 int * wsptr; 2138 JSAMPROW outptr; 2139 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2140 int ctr; 2141 int workspace[8*14]; /* buffers data between passes */ 2142 SHIFT_TEMPS 2143 2144 /* Pass 1: process columns from input, store into work array. */ 2145 2146 inptr = coef_block; 2147 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2148 wsptr = workspace; 2149 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 2150 /* Even part */ 2151 2152 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2153 z1 <<= CONST_BITS; 2154 /* Add fudge factor here for final descale. */ 2155 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 2156 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2157 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 2158 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 2159 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 2160 2161 tmp10 = z1 + z2; 2162 tmp11 = z1 + z3; 2163 tmp12 = z1 - z4; 2164 2165 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 2166 CONST_BITS-PASS1_BITS); 2167 2168 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2169 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2170 2171 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 2172 2173 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 2174 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 2175 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 2176 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 2177 2178 tmp20 = tmp10 + tmp13; 2179 tmp26 = tmp10 - tmp13; 2180 tmp21 = tmp11 + tmp14; 2181 tmp25 = tmp11 - tmp14; 2182 tmp22 = tmp12 + tmp15; 2183 tmp24 = tmp12 - tmp15; 2184 2185 /* Odd part */ 2186 2187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2190 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2191 tmp13 = z4 << CONST_BITS; 2192 2193 tmp14 = z1 + z3; 2194 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 2195 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 2196 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 2197 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 2198 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 2199 z1 -= z2; 2200 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 2201 tmp16 += tmp15; 2202 z1 += z4; 2203 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 2204 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 2205 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 2206 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 2207 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 2208 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 2209 2210 tmp13 = (z1 - z3) << PASS1_BITS; 2211 2212 /* Final output stage */ 2213 2214 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 2215 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 2216 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 2217 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 2218 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 2219 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 2220 wsptr[8*3] = (int) (tmp23 + tmp13); 2221 wsptr[8*10] = (int) (tmp23 - tmp13); 2222 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 2223 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 2224 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 2225 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 2226 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 2227 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 2228 } 2229 2230 /* Pass 2: process 14 rows from work array, store into output array. */ 2231 2232 wsptr = workspace; 2233 for (ctr = 0; ctr < 14; ctr++) { 2234 outptr = output_buf[ctr] + output_col; 2235 2236 /* Even part */ 2237 2238 /* Add range center and fudge factor for final descale and range-limit. */ 2239 z1 = (INT32) wsptr[0] + 2240 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 2241 (ONE << (PASS1_BITS+2))); 2242 z1 <<= CONST_BITS; 2243 z4 = (INT32) wsptr[4]; 2244 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 2245 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 2246 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 2247 2248 tmp10 = z1 + z2; 2249 tmp11 = z1 + z3; 2250 tmp12 = z1 - z4; 2251 2252 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 2253 2254 z1 = (INT32) wsptr[2]; 2255 z2 = (INT32) wsptr[6]; 2256 2257 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 2258 2259 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 2260 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 2261 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 2262 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 2263 2264 tmp20 = tmp10 + tmp13; 2265 tmp26 = tmp10 - tmp13; 2266 tmp21 = tmp11 + tmp14; 2267 tmp25 = tmp11 - tmp14; 2268 tmp22 = tmp12 + tmp15; 2269 tmp24 = tmp12 - tmp15; 2270 2271 /* Odd part */ 2272 2273 z1 = (INT32) wsptr[1]; 2274 z2 = (INT32) wsptr[3]; 2275 z3 = (INT32) wsptr[5]; 2276 z4 = (INT32) wsptr[7]; 2277 z4 <<= CONST_BITS; 2278 2279 tmp14 = z1 + z3; 2280 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 2281 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 2282 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 2283 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 2284 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 2285 z1 -= z2; 2286 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 2287 tmp16 += tmp15; 2288 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 2289 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 2290 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 2291 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 2292 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 2293 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 2294 2295 tmp13 = ((z1 - z3) << CONST_BITS) + z4; 2296 2297 /* Final output stage */ 2298 2299 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 2300 CONST_BITS+PASS1_BITS+3) 2301 & RANGE_MASK]; 2302 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 2303 CONST_BITS+PASS1_BITS+3) 2304 & RANGE_MASK]; 2305 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 2306 CONST_BITS+PASS1_BITS+3) 2307 & RANGE_MASK]; 2308 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 2309 CONST_BITS+PASS1_BITS+3) 2310 & RANGE_MASK]; 2311 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 2312 CONST_BITS+PASS1_BITS+3) 2313 & RANGE_MASK]; 2314 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 2315 CONST_BITS+PASS1_BITS+3) 2316 & RANGE_MASK]; 2317 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 2318 CONST_BITS+PASS1_BITS+3) 2319 & RANGE_MASK]; 2320 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 2321 CONST_BITS+PASS1_BITS+3) 2322 & RANGE_MASK]; 2323 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 2324 CONST_BITS+PASS1_BITS+3) 2325 & RANGE_MASK]; 2326 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 2327 CONST_BITS+PASS1_BITS+3) 2328 & RANGE_MASK]; 2329 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 2330 CONST_BITS+PASS1_BITS+3) 2331 & RANGE_MASK]; 2332 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 2333 CONST_BITS+PASS1_BITS+3) 2334 & RANGE_MASK]; 2335 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 2336 CONST_BITS+PASS1_BITS+3) 2337 & RANGE_MASK]; 2338 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 2339 CONST_BITS+PASS1_BITS+3) 2340 & RANGE_MASK]; 2341 2342 wsptr += 8; /* advance pointer to next row */ 2343 } 2344 } 2345 2346 2347 /* 2348 * Perform dequantization and inverse DCT on one block of coefficients, 2349 * producing a 15x15 output block. 2350 * 2351 * Optimized algorithm with 22 multiplications in the 1-D kernel. 2352 * cK represents sqrt(2) * cos(K*pi/30). 2353 */ 2354 2355 GLOBAL(void) 2356 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2357 JCOEFPTR coef_block, 2358 JSAMPARRAY output_buf, JDIMENSION output_col) 2359 { 2360 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 2361 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 2362 INT32 z1, z2, z3, z4; 2363 JCOEFPTR inptr; 2364 ISLOW_MULT_TYPE * quantptr; 2365 int * wsptr; 2366 JSAMPROW outptr; 2367 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2368 int ctr; 2369 int workspace[8*15]; /* buffers data between passes */ 2370 SHIFT_TEMPS 2371 2372 /* Pass 1: process columns from input, store into work array. */ 2373 2374 inptr = coef_block; 2375 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2376 wsptr = workspace; 2377 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 2378 /* Even part */ 2379 2380 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2381 z1 <<= CONST_BITS; 2382 /* Add fudge factor here for final descale. */ 2383 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 2384 2385 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2386 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2387 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2388 2389 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 2390 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 2391 2392 tmp12 = z1 - tmp10; 2393 tmp13 = z1 + tmp11; 2394 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 2395 2396 z4 = z2 - z3; 2397 z3 += z2; 2398 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 2399 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 2400 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 2401 2402 tmp20 = tmp13 + tmp10 + tmp11; 2403 tmp23 = tmp12 - tmp10 + tmp11 + z2; 2404 2405 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 2406 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 2407 2408 tmp25 = tmp13 - tmp10 - tmp11; 2409 tmp26 = tmp12 + tmp10 - tmp11 - z2; 2410 2411 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 2412 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 2413 2414 tmp21 = tmp12 + tmp10 + tmp11; 2415 tmp24 = tmp13 - tmp10 + tmp11; 2416 tmp11 += tmp11; 2417 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 2418 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 2419 2420 /* Odd part */ 2421 2422 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2423 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2424 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2425 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 2426 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2427 2428 tmp13 = z2 - z4; 2429 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 2430 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 2431 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 2432 2433 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 2434 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 2435 z2 = z1 - z4; 2436 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 2437 2438 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 2439 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 2440 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 2441 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 2442 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 2443 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 2444 2445 /* Final output stage */ 2446 2447 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 2448 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 2449 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 2450 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 2451 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 2452 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 2453 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 2454 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 2455 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 2456 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 2457 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 2458 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 2459 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 2460 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 2461 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); 2462 } 2463 2464 /* Pass 2: process 15 rows from work array, store into output array. */ 2465 2466 wsptr = workspace; 2467 for (ctr = 0; ctr < 15; ctr++) { 2468 outptr = output_buf[ctr] + output_col; 2469 2470 /* Even part */ 2471 2472 /* Add range center and fudge factor for final descale and range-limit. */ 2473 z1 = (INT32) wsptr[0] + 2474 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 2475 (ONE << (PASS1_BITS+2))); 2476 z1 <<= CONST_BITS; 2477 2478 z2 = (INT32) wsptr[2]; 2479 z3 = (INT32) wsptr[4]; 2480 z4 = (INT32) wsptr[6]; 2481 2482 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 2483 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 2484 2485 tmp12 = z1 - tmp10; 2486 tmp13 = z1 + tmp11; 2487 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 2488 2489 z4 = z2 - z3; 2490 z3 += z2; 2491 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 2492 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 2493 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 2494 2495 tmp20 = tmp13 + tmp10 + tmp11; 2496 tmp23 = tmp12 - tmp10 + tmp11 + z2; 2497 2498 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 2499 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 2500 2501 tmp25 = tmp13 - tmp10 - tmp11; 2502 tmp26 = tmp12 + tmp10 - tmp11 - z2; 2503 2504 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 2505 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 2506 2507 tmp21 = tmp12 + tmp10 + tmp11; 2508 tmp24 = tmp13 - tmp10 + tmp11; 2509 tmp11 += tmp11; 2510 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 2511 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 2512 2513 /* Odd part */ 2514 2515 z1 = (INT32) wsptr[1]; 2516 z2 = (INT32) wsptr[3]; 2517 z4 = (INT32) wsptr[5]; 2518 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 2519 z4 = (INT32) wsptr[7]; 2520 2521 tmp13 = z2 - z4; 2522 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 2523 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 2524 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 2525 2526 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 2527 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 2528 z2 = z1 - z4; 2529 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 2530 2531 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 2532 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 2533 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 2534 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 2535 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 2536 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 2537 2538 /* Final output stage */ 2539 2540 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 2541 CONST_BITS+PASS1_BITS+3) 2542 & RANGE_MASK]; 2543 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 2544 CONST_BITS+PASS1_BITS+3) 2545 & RANGE_MASK]; 2546 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 2547 CONST_BITS+PASS1_BITS+3) 2548 & RANGE_MASK]; 2549 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 2550 CONST_BITS+PASS1_BITS+3) 2551 & RANGE_MASK]; 2552 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 2553 CONST_BITS+PASS1_BITS+3) 2554 & RANGE_MASK]; 2555 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 2556 CONST_BITS+PASS1_BITS+3) 2557 & RANGE_MASK]; 2558 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 2559 CONST_BITS+PASS1_BITS+3) 2560 & RANGE_MASK]; 2561 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 2562 CONST_BITS+PASS1_BITS+3) 2563 & RANGE_MASK]; 2564 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 2565 CONST_BITS+PASS1_BITS+3) 2566 & RANGE_MASK]; 2567 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 2568 CONST_BITS+PASS1_BITS+3) 2569 & RANGE_MASK]; 2570 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 2571 CONST_BITS+PASS1_BITS+3) 2572 & RANGE_MASK]; 2573 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 2574 CONST_BITS+PASS1_BITS+3) 2575 & RANGE_MASK]; 2576 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 2577 CONST_BITS+PASS1_BITS+3) 2578 & RANGE_MASK]; 2579 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 2580 CONST_BITS+PASS1_BITS+3) 2581 & RANGE_MASK]; 2582 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, 2583 CONST_BITS+PASS1_BITS+3) 2584 & RANGE_MASK]; 2585 2586 wsptr += 8; /* advance pointer to next row */ 2587 } 2588 } 2589 2590 2591 /* 2592 * Perform dequantization and inverse DCT on one block of coefficients, 2593 * producing a 16x16 output block. 2594 * 2595 * Optimized algorithm with 28 multiplications in the 1-D kernel. 2596 * cK represents sqrt(2) * cos(K*pi/32). 2597 */ 2598 2599 GLOBAL(void) 2600 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2601 JCOEFPTR coef_block, 2602 JSAMPARRAY output_buf, JDIMENSION output_col) 2603 { 2604 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 2605 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 2606 INT32 z1, z2, z3, z4; 2607 JCOEFPTR inptr; 2608 ISLOW_MULT_TYPE * quantptr; 2609 int * wsptr; 2610 JSAMPROW outptr; 2611 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2612 int ctr; 2613 int workspace[8*16]; /* buffers data between passes */ 2614 SHIFT_TEMPS 2615 2616 /* Pass 1: process columns from input, store into work array. */ 2617 2618 inptr = coef_block; 2619 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2620 wsptr = workspace; 2621 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 2622 /* Even part */ 2623 2624 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2625 tmp0 <<= CONST_BITS; 2626 /* Add fudge factor here for final descale. */ 2627 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 2628 2629 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2630 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 2631 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 2632 2633 tmp10 = tmp0 + tmp1; 2634 tmp11 = tmp0 - tmp1; 2635 tmp12 = tmp0 + tmp2; 2636 tmp13 = tmp0 - tmp2; 2637 2638 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2639 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2640 z3 = z1 - z2; 2641 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 2642 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 2643 2644 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 2645 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 2646 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 2647 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 2648 2649 tmp20 = tmp10 + tmp0; 2650 tmp27 = tmp10 - tmp0; 2651 tmp21 = tmp12 + tmp1; 2652 tmp26 = tmp12 - tmp1; 2653 tmp22 = tmp13 + tmp2; 2654 tmp25 = tmp13 - tmp2; 2655 tmp23 = tmp11 + tmp3; 2656 tmp24 = tmp11 - tmp3; 2657 2658 /* Odd part */ 2659 2660 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2661 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2662 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2663 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2664 2665 tmp11 = z1 + z3; 2666 2667 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 2668 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 2669 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 2670 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 2671 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 2672 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 2673 tmp0 = tmp1 + tmp2 + tmp3 - 2674 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 2675 tmp13 = tmp10 + tmp11 + tmp12 - 2676 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 2677 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 2678 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 2679 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 2680 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 2681 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 2682 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 2683 z2 += z4; 2684 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 2685 tmp1 += z1; 2686 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 2687 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 2688 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 2689 tmp12 += z2; 2690 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 2691 tmp2 += z2; 2692 tmp3 += z2; 2693 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 2694 tmp10 += z2; 2695 tmp11 += z2; 2696 2697 /* Final output stage */ 2698 2699 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 2700 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 2701 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 2702 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 2703 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 2704 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 2705 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 2706 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 2707 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 2708 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 2709 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 2710 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 2711 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 2712 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 2713 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 2714 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 2715 } 2716 2717 /* Pass 2: process 16 rows from work array, store into output array. */ 2718 2719 wsptr = workspace; 2720 for (ctr = 0; ctr < 16; ctr++) { 2721 outptr = output_buf[ctr] + output_col; 2722 2723 /* Even part */ 2724 2725 /* Add range center and fudge factor for final descale and range-limit. */ 2726 tmp0 = (INT32) wsptr[0] + 2727 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 2728 (ONE << (PASS1_BITS+2))); 2729 tmp0 <<= CONST_BITS; 2730 2731 z1 = (INT32) wsptr[4]; 2732 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 2733 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 2734 2735 tmp10 = tmp0 + tmp1; 2736 tmp11 = tmp0 - tmp1; 2737 tmp12 = tmp0 + tmp2; 2738 tmp13 = tmp0 - tmp2; 2739 2740 z1 = (INT32) wsptr[2]; 2741 z2 = (INT32) wsptr[6]; 2742 z3 = z1 - z2; 2743 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 2744 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 2745 2746 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 2747 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 2748 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 2749 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 2750 2751 tmp20 = tmp10 + tmp0; 2752 tmp27 = tmp10 - tmp0; 2753 tmp21 = tmp12 + tmp1; 2754 tmp26 = tmp12 - tmp1; 2755 tmp22 = tmp13 + tmp2; 2756 tmp25 = tmp13 - tmp2; 2757 tmp23 = tmp11 + tmp3; 2758 tmp24 = tmp11 - tmp3; 2759 2760 /* Odd part */ 2761 2762 z1 = (INT32) wsptr[1]; 2763 z2 = (INT32) wsptr[3]; 2764 z3 = (INT32) wsptr[5]; 2765 z4 = (INT32) wsptr[7]; 2766 2767 tmp11 = z1 + z3; 2768 2769 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 2770 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 2771 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 2772 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 2773 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 2774 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 2775 tmp0 = tmp1 + tmp2 + tmp3 - 2776 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 2777 tmp13 = tmp10 + tmp11 + tmp12 - 2778 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 2779 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 2780 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 2781 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 2782 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 2783 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 2784 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 2785 z2 += z4; 2786 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 2787 tmp1 += z1; 2788 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 2789 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 2790 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 2791 tmp12 += z2; 2792 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 2793 tmp2 += z2; 2794 tmp3 += z2; 2795 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 2796 tmp10 += z2; 2797 tmp11 += z2; 2798 2799 /* Final output stage */ 2800 2801 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 2802 CONST_BITS+PASS1_BITS+3) 2803 & RANGE_MASK]; 2804 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 2805 CONST_BITS+PASS1_BITS+3) 2806 & RANGE_MASK]; 2807 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 2808 CONST_BITS+PASS1_BITS+3) 2809 & RANGE_MASK]; 2810 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 2811 CONST_BITS+PASS1_BITS+3) 2812 & RANGE_MASK]; 2813 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 2814 CONST_BITS+PASS1_BITS+3) 2815 & RANGE_MASK]; 2816 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 2817 CONST_BITS+PASS1_BITS+3) 2818 & RANGE_MASK]; 2819 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 2820 CONST_BITS+PASS1_BITS+3) 2821 & RANGE_MASK]; 2822 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 2823 CONST_BITS+PASS1_BITS+3) 2824 & RANGE_MASK]; 2825 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 2826 CONST_BITS+PASS1_BITS+3) 2827 & RANGE_MASK]; 2828 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 2829 CONST_BITS+PASS1_BITS+3) 2830 & RANGE_MASK]; 2831 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 2832 CONST_BITS+PASS1_BITS+3) 2833 & RANGE_MASK]; 2834 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 2835 CONST_BITS+PASS1_BITS+3) 2836 & RANGE_MASK]; 2837 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 2838 CONST_BITS+PASS1_BITS+3) 2839 & RANGE_MASK]; 2840 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 2841 CONST_BITS+PASS1_BITS+3) 2842 & RANGE_MASK]; 2843 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 2844 CONST_BITS+PASS1_BITS+3) 2845 & RANGE_MASK]; 2846 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 2847 CONST_BITS+PASS1_BITS+3) 2848 & RANGE_MASK]; 2849 2850 wsptr += 8; /* advance pointer to next row */ 2851 } 2852 } 2853 2854 2855 /* 2856 * Perform dequantization and inverse DCT on one block of coefficients, 2857 * producing a 16x8 output block. 2858 * 2859 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows). 2860 */ 2861 2862 GLOBAL(void) 2863 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2864 JCOEFPTR coef_block, 2865 JSAMPARRAY output_buf, JDIMENSION output_col) 2866 { 2867 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 2868 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 2869 INT32 z1, z2, z3, z4; 2870 JCOEFPTR inptr; 2871 ISLOW_MULT_TYPE * quantptr; 2872 int * wsptr; 2873 JSAMPROW outptr; 2874 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2875 int ctr; 2876 int workspace[8*8]; /* buffers data between passes */ 2877 SHIFT_TEMPS 2878 2879 /* Pass 1: process columns from input, store into work array. 2880 * Note results are scaled up by sqrt(8) compared to a true IDCT; 2881 * furthermore, we scale the results by 2**PASS1_BITS. 2882 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 2883 */ 2884 2885 inptr = coef_block; 2886 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2887 wsptr = workspace; 2888 for (ctr = DCTSIZE; ctr > 0; ctr--) { 2889 /* Due to quantization, we will usually find that many of the input 2890 * coefficients are zero, especially the AC terms. We can exploit this 2891 * by short-circuiting the IDCT calculation for any column in which all 2892 * the AC terms are zero. In that case each output is equal to the 2893 * DC coefficient (with scale factor as needed). 2894 * With typical images and quantization tables, half or more of the 2895 * column DCT calculations can be simplified this way. 2896 */ 2897 2898 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 2899 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 2900 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 2901 inptr[DCTSIZE*7] == 0) { 2902 /* AC terms all zero */ 2903 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 2904 2905 wsptr[DCTSIZE*0] = dcval; 2906 wsptr[DCTSIZE*1] = dcval; 2907 wsptr[DCTSIZE*2] = dcval; 2908 wsptr[DCTSIZE*3] = dcval; 2909 wsptr[DCTSIZE*4] = dcval; 2910 wsptr[DCTSIZE*5] = dcval; 2911 wsptr[DCTSIZE*6] = dcval; 2912 wsptr[DCTSIZE*7] = dcval; 2913 2914 inptr++; /* advance pointers to next column */ 2915 quantptr++; 2916 wsptr++; 2917 continue; 2918 } 2919 2920 /* Even part: reverse the even part of the forward DCT. 2921 * The rotator is c(-6). 2922 */ 2923 2924 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2925 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2926 z2 <<= CONST_BITS; 2927 z3 <<= CONST_BITS; 2928 /* Add fudge factor here for final descale. */ 2929 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 2930 2931 tmp0 = z2 + z3; 2932 tmp1 = z2 - z3; 2933 2934 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2935 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2936 2937 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 2938 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 2939 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 2940 2941 tmp10 = tmp0 + tmp2; 2942 tmp13 = tmp0 - tmp2; 2943 tmp11 = tmp1 + tmp3; 2944 tmp12 = tmp1 - tmp3; 2945 2946 /* Odd part per figure 8; the matrix is unitary and hence its 2947 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 2948 */ 2949 2950 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2951 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2952 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2953 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2954 2955 z2 = tmp0 + tmp2; 2956 z3 = tmp1 + tmp3; 2957 2958 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 2959 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 2960 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 2961 z2 += z1; 2962 z3 += z1; 2963 2964 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 2965 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 2966 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 2967 tmp0 += z1 + z2; 2968 tmp3 += z1 + z3; 2969 2970 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 2971 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 2972 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 2973 tmp1 += z1 + z3; 2974 tmp2 += z1 + z2; 2975 2976 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 2977 2978 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 2979 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 2980 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 2981 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 2982 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 2983 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 2984 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 2985 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 2986 2987 inptr++; /* advance pointers to next column */ 2988 quantptr++; 2989 wsptr++; 2990 } 2991 2992 /* Pass 2: process 8 rows from work array, store into output array. 2993 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 2994 */ 2995 2996 wsptr = workspace; 2997 for (ctr = 0; ctr < 8; ctr++) { 2998 outptr = output_buf[ctr] + output_col; 2999 3000 /* Even part */ 3001 3002 /* Add range center and fudge factor for final descale and range-limit. */ 3003 tmp0 = (INT32) wsptr[0] + 3004 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3005 (ONE << (PASS1_BITS+2))); 3006 tmp0 <<= CONST_BITS; 3007 3008 z1 = (INT32) wsptr[4]; 3009 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 3010 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 3011 3012 tmp10 = tmp0 + tmp1; 3013 tmp11 = tmp0 - tmp1; 3014 tmp12 = tmp0 + tmp2; 3015 tmp13 = tmp0 - tmp2; 3016 3017 z1 = (INT32) wsptr[2]; 3018 z2 = (INT32) wsptr[6]; 3019 z3 = z1 - z2; 3020 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 3021 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 3022 3023 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 3024 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 3025 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 3026 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 3027 3028 tmp20 = tmp10 + tmp0; 3029 tmp27 = tmp10 - tmp0; 3030 tmp21 = tmp12 + tmp1; 3031 tmp26 = tmp12 - tmp1; 3032 tmp22 = tmp13 + tmp2; 3033 tmp25 = tmp13 - tmp2; 3034 tmp23 = tmp11 + tmp3; 3035 tmp24 = tmp11 - tmp3; 3036 3037 /* Odd part */ 3038 3039 z1 = (INT32) wsptr[1]; 3040 z2 = (INT32) wsptr[3]; 3041 z3 = (INT32) wsptr[5]; 3042 z4 = (INT32) wsptr[7]; 3043 3044 tmp11 = z1 + z3; 3045 3046 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 3047 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 3048 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 3049 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 3050 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 3051 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 3052 tmp0 = tmp1 + tmp2 + tmp3 - 3053 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 3054 tmp13 = tmp10 + tmp11 + tmp12 - 3055 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 3056 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 3057 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 3058 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 3059 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 3060 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 3061 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 3062 z2 += z4; 3063 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 3064 tmp1 += z1; 3065 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 3066 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 3067 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 3068 tmp12 += z2; 3069 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 3070 tmp2 += z2; 3071 tmp3 += z2; 3072 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 3073 tmp10 += z2; 3074 tmp11 += z2; 3075 3076 /* Final output stage */ 3077 3078 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 3079 CONST_BITS+PASS1_BITS+3) 3080 & RANGE_MASK]; 3081 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 3082 CONST_BITS+PASS1_BITS+3) 3083 & RANGE_MASK]; 3084 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 3085 CONST_BITS+PASS1_BITS+3) 3086 & RANGE_MASK]; 3087 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 3088 CONST_BITS+PASS1_BITS+3) 3089 & RANGE_MASK]; 3090 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 3091 CONST_BITS+PASS1_BITS+3) 3092 & RANGE_MASK]; 3093 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 3094 CONST_BITS+PASS1_BITS+3) 3095 & RANGE_MASK]; 3096 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 3097 CONST_BITS+PASS1_BITS+3) 3098 & RANGE_MASK]; 3099 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 3100 CONST_BITS+PASS1_BITS+3) 3101 & RANGE_MASK]; 3102 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 3103 CONST_BITS+PASS1_BITS+3) 3104 & RANGE_MASK]; 3105 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 3106 CONST_BITS+PASS1_BITS+3) 3107 & RANGE_MASK]; 3108 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 3109 CONST_BITS+PASS1_BITS+3) 3110 & RANGE_MASK]; 3111 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 3112 CONST_BITS+PASS1_BITS+3) 3113 & RANGE_MASK]; 3114 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 3115 CONST_BITS+PASS1_BITS+3) 3116 & RANGE_MASK]; 3117 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 3118 CONST_BITS+PASS1_BITS+3) 3119 & RANGE_MASK]; 3120 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 3121 CONST_BITS+PASS1_BITS+3) 3122 & RANGE_MASK]; 3123 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 3124 CONST_BITS+PASS1_BITS+3) 3125 & RANGE_MASK]; 3126 3127 wsptr += 8; /* advance pointer to next row */ 3128 } 3129 } 3130 3131 3132 /* 3133 * Perform dequantization and inverse DCT on one block of coefficients, 3134 * producing a 14x7 output block. 3135 * 3136 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows). 3137 */ 3138 3139 GLOBAL(void) 3140 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3141 JCOEFPTR coef_block, 3142 JSAMPARRAY output_buf, JDIMENSION output_col) 3143 { 3144 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 3145 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 3146 INT32 z1, z2, z3, z4; 3147 JCOEFPTR inptr; 3148 ISLOW_MULT_TYPE * quantptr; 3149 int * wsptr; 3150 JSAMPROW outptr; 3151 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3152 int ctr; 3153 int workspace[8*7]; /* buffers data between passes */ 3154 SHIFT_TEMPS 3155 3156 /* Pass 1: process columns from input, store into work array. 3157 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). 3158 */ 3159 3160 inptr = coef_block; 3161 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3162 wsptr = workspace; 3163 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 3164 /* Even part */ 3165 3166 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3167 tmp23 <<= CONST_BITS; 3168 /* Add fudge factor here for final descale. */ 3169 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1); 3170 3171 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 3172 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 3173 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 3174 3175 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 3176 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 3177 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 3178 tmp10 = z1 + z3; 3179 z2 -= tmp10; 3180 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ 3181 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 3182 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 3183 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 3184 3185 /* Odd part */ 3186 3187 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3188 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 3189 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 3190 3191 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 3192 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 3193 tmp10 = tmp11 - tmp12; 3194 tmp11 += tmp12; 3195 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 3196 tmp11 += tmp12; 3197 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 3198 tmp10 += z2; 3199 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 3200 3201 /* Final output stage */ 3202 3203 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 3204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 3205 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 3206 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 3207 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 3208 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 3209 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS); 3210 } 3211 3212 /* Pass 2: process 7 rows from work array, store into output array. 3213 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). 3214 */ 3215 3216 wsptr = workspace; 3217 for (ctr = 0; ctr < 7; ctr++) { 3218 outptr = output_buf[ctr] + output_col; 3219 3220 /* Even part */ 3221 3222 /* Add range center and fudge factor for final descale and range-limit. */ 3223 z1 = (INT32) wsptr[0] + 3224 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3225 (ONE << (PASS1_BITS+2))); 3226 z1 <<= CONST_BITS; 3227 z4 = (INT32) wsptr[4]; 3228 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 3229 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 3230 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 3231 3232 tmp10 = z1 + z2; 3233 tmp11 = z1 + z3; 3234 tmp12 = z1 - z4; 3235 3236 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 3237 3238 z1 = (INT32) wsptr[2]; 3239 z2 = (INT32) wsptr[6]; 3240 3241 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 3242 3243 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 3244 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 3245 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 3246 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 3247 3248 tmp20 = tmp10 + tmp13; 3249 tmp26 = tmp10 - tmp13; 3250 tmp21 = tmp11 + tmp14; 3251 tmp25 = tmp11 - tmp14; 3252 tmp22 = tmp12 + tmp15; 3253 tmp24 = tmp12 - tmp15; 3254 3255 /* Odd part */ 3256 3257 z1 = (INT32) wsptr[1]; 3258 z2 = (INT32) wsptr[3]; 3259 z3 = (INT32) wsptr[5]; 3260 z4 = (INT32) wsptr[7]; 3261 z4 <<= CONST_BITS; 3262 3263 tmp14 = z1 + z3; 3264 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 3265 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 3266 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 3267 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 3268 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 3269 z1 -= z2; 3270 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 3271 tmp16 += tmp15; 3272 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 3273 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 3274 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 3275 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 3276 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 3277 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 3278 3279 tmp13 = ((z1 - z3) << CONST_BITS) + z4; 3280 3281 /* Final output stage */ 3282 3283 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 3284 CONST_BITS+PASS1_BITS+3) 3285 & RANGE_MASK]; 3286 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 3287 CONST_BITS+PASS1_BITS+3) 3288 & RANGE_MASK]; 3289 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 3290 CONST_BITS+PASS1_BITS+3) 3291 & RANGE_MASK]; 3292 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 3293 CONST_BITS+PASS1_BITS+3) 3294 & RANGE_MASK]; 3295 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 3296 CONST_BITS+PASS1_BITS+3) 3297 & RANGE_MASK]; 3298 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 3299 CONST_BITS+PASS1_BITS+3) 3300 & RANGE_MASK]; 3301 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 3302 CONST_BITS+PASS1_BITS+3) 3303 & RANGE_MASK]; 3304 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 3305 CONST_BITS+PASS1_BITS+3) 3306 & RANGE_MASK]; 3307 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 3308 CONST_BITS+PASS1_BITS+3) 3309 & RANGE_MASK]; 3310 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 3311 CONST_BITS+PASS1_BITS+3) 3312 & RANGE_MASK]; 3313 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 3314 CONST_BITS+PASS1_BITS+3) 3315 & RANGE_MASK]; 3316 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 3317 CONST_BITS+PASS1_BITS+3) 3318 & RANGE_MASK]; 3319 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 3320 CONST_BITS+PASS1_BITS+3) 3321 & RANGE_MASK]; 3322 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 3323 CONST_BITS+PASS1_BITS+3) 3324 & RANGE_MASK]; 3325 3326 wsptr += 8; /* advance pointer to next row */ 3327 } 3328 } 3329 3330 3331 /* 3332 * Perform dequantization and inverse DCT on one block of coefficients, 3333 * producing a 12x6 output block. 3334 * 3335 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows). 3336 */ 3337 3338 GLOBAL(void) 3339 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3340 JCOEFPTR coef_block, 3341 JSAMPARRAY output_buf, JDIMENSION output_col) 3342 { 3343 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 3344 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 3345 INT32 z1, z2, z3, z4; 3346 JCOEFPTR inptr; 3347 ISLOW_MULT_TYPE * quantptr; 3348 int * wsptr; 3349 JSAMPROW outptr; 3350 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3351 int ctr; 3352 int workspace[8*6]; /* buffers data between passes */ 3353 SHIFT_TEMPS 3354 3355 /* Pass 1: process columns from input, store into work array. 3356 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 3357 */ 3358 3359 inptr = coef_block; 3360 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3361 wsptr = workspace; 3362 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 3363 /* Even part */ 3364 3365 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3366 tmp10 <<= CONST_BITS; 3367 /* Add fudge factor here for final descale. */ 3368 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 3369 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 3370 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ 3371 tmp11 = tmp10 + tmp20; 3372 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS); 3373 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 3374 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ 3375 tmp20 = tmp11 + tmp10; 3376 tmp22 = tmp11 - tmp10; 3377 3378 /* Odd part */ 3379 3380 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3381 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 3382 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 3383 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 3384 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); 3385 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); 3386 tmp11 = (z1 - z2 - z3) << PASS1_BITS; 3387 3388 /* Final output stage */ 3389 3390 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 3391 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 3392 wsptr[8*1] = (int) (tmp21 + tmp11); 3393 wsptr[8*4] = (int) (tmp21 - tmp11); 3394 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 3395 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 3396 } 3397 3398 /* Pass 2: process 6 rows from work array, store into output array. 3399 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). 3400 */ 3401 3402 wsptr = workspace; 3403 for (ctr = 0; ctr < 6; ctr++) { 3404 outptr = output_buf[ctr] + output_col; 3405 3406 /* Even part */ 3407 3408 /* Add range center and fudge factor for final descale and range-limit. */ 3409 z3 = (INT32) wsptr[0] + 3410 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3411 (ONE << (PASS1_BITS+2))); 3412 z3 <<= CONST_BITS; 3413 3414 z4 = (INT32) wsptr[4]; 3415 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 3416 3417 tmp10 = z3 + z4; 3418 tmp11 = z3 - z4; 3419 3420 z1 = (INT32) wsptr[2]; 3421 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 3422 z1 <<= CONST_BITS; 3423 z2 = (INT32) wsptr[6]; 3424 z2 <<= CONST_BITS; 3425 3426 tmp12 = z1 - z2; 3427 3428 tmp21 = z3 + tmp12; 3429 tmp24 = z3 - tmp12; 3430 3431 tmp12 = z4 + z2; 3432 3433 tmp20 = tmp10 + tmp12; 3434 tmp25 = tmp10 - tmp12; 3435 3436 tmp12 = z4 - z1 - z2; 3437 3438 tmp22 = tmp11 + tmp12; 3439 tmp23 = tmp11 - tmp12; 3440 3441 /* Odd part */ 3442 3443 z1 = (INT32) wsptr[1]; 3444 z2 = (INT32) wsptr[3]; 3445 z3 = (INT32) wsptr[5]; 3446 z4 = (INT32) wsptr[7]; 3447 3448 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 3449 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 3450 3451 tmp10 = z1 + z3; 3452 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 3453 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 3454 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 3455 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 3456 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 3457 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 3458 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 3459 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 3460 3461 z1 -= z4; 3462 z2 -= z3; 3463 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 3464 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 3465 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 3466 3467 /* Final output stage */ 3468 3469 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 3470 CONST_BITS+PASS1_BITS+3) 3471 & RANGE_MASK]; 3472 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 3473 CONST_BITS+PASS1_BITS+3) 3474 & RANGE_MASK]; 3475 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 3476 CONST_BITS+PASS1_BITS+3) 3477 & RANGE_MASK]; 3478 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 3479 CONST_BITS+PASS1_BITS+3) 3480 & RANGE_MASK]; 3481 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 3482 CONST_BITS+PASS1_BITS+3) 3483 & RANGE_MASK]; 3484 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 3485 CONST_BITS+PASS1_BITS+3) 3486 & RANGE_MASK]; 3487 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 3488 CONST_BITS+PASS1_BITS+3) 3489 & RANGE_MASK]; 3490 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 3491 CONST_BITS+PASS1_BITS+3) 3492 & RANGE_MASK]; 3493 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 3494 CONST_BITS+PASS1_BITS+3) 3495 & RANGE_MASK]; 3496 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 3497 CONST_BITS+PASS1_BITS+3) 3498 & RANGE_MASK]; 3499 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 3500 CONST_BITS+PASS1_BITS+3) 3501 & RANGE_MASK]; 3502 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 3503 CONST_BITS+PASS1_BITS+3) 3504 & RANGE_MASK]; 3505 3506 wsptr += 8; /* advance pointer to next row */ 3507 } 3508 } 3509 3510 3511 /* 3512 * Perform dequantization and inverse DCT on one block of coefficients, 3513 * producing a 10x5 output block. 3514 * 3515 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows). 3516 */ 3517 3518 GLOBAL(void) 3519 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3520 JCOEFPTR coef_block, 3521 JSAMPARRAY output_buf, JDIMENSION output_col) 3522 { 3523 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 3524 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 3525 INT32 z1, z2, z3, z4; 3526 JCOEFPTR inptr; 3527 ISLOW_MULT_TYPE * quantptr; 3528 int * wsptr; 3529 JSAMPROW outptr; 3530 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3531 int ctr; 3532 int workspace[8*5]; /* buffers data between passes */ 3533 SHIFT_TEMPS 3534 3535 /* Pass 1: process columns from input, store into work array. 3536 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). 3537 */ 3538 3539 inptr = coef_block; 3540 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3541 wsptr = workspace; 3542 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 3543 /* Even part */ 3544 3545 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3546 tmp12 <<= CONST_BITS; 3547 /* Add fudge factor here for final descale. */ 3548 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 3549 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 3550 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 3551 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ 3552 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ 3553 z3 = tmp12 + z2; 3554 tmp10 = z3 + z1; 3555 tmp11 = z3 - z1; 3556 tmp12 -= z2 << 2; 3557 3558 /* Odd part */ 3559 3560 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3561 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 3562 3563 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 3564 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 3565 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 3566 3567 /* Final output stage */ 3568 3569 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS); 3570 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS); 3571 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS); 3572 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS); 3573 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 3574 } 3575 3576 /* Pass 2: process 5 rows from work array, store into output array. 3577 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). 3578 */ 3579 3580 wsptr = workspace; 3581 for (ctr = 0; ctr < 5; ctr++) { 3582 outptr = output_buf[ctr] + output_col; 3583 3584 /* Even part */ 3585 3586 /* Add range center and fudge factor for final descale and range-limit. */ 3587 z3 = (INT32) wsptr[0] + 3588 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3589 (ONE << (PASS1_BITS+2))); 3590 z3 <<= CONST_BITS; 3591 z4 = (INT32) wsptr[4]; 3592 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 3593 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 3594 tmp10 = z3 + z1; 3595 tmp11 = z3 - z2; 3596 3597 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 3598 3599 z2 = (INT32) wsptr[2]; 3600 z3 = (INT32) wsptr[6]; 3601 3602 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 3603 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 3604 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 3605 3606 tmp20 = tmp10 + tmp12; 3607 tmp24 = tmp10 - tmp12; 3608 tmp21 = tmp11 + tmp13; 3609 tmp23 = tmp11 - tmp13; 3610 3611 /* Odd part */ 3612 3613 z1 = (INT32) wsptr[1]; 3614 z2 = (INT32) wsptr[3]; 3615 z3 = (INT32) wsptr[5]; 3616 z3 <<= CONST_BITS; 3617 z4 = (INT32) wsptr[7]; 3618 3619 tmp11 = z2 + z4; 3620 tmp13 = z2 - z4; 3621 3622 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 3623 3624 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 3625 z4 = z3 + tmp12; 3626 3627 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 3628 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 3629 3630 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 3631 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 3632 3633 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 3634 3635 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 3636 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 3637 3638 /* Final output stage */ 3639 3640 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 3641 CONST_BITS+PASS1_BITS+3) 3642 & RANGE_MASK]; 3643 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 3644 CONST_BITS+PASS1_BITS+3) 3645 & RANGE_MASK]; 3646 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 3647 CONST_BITS+PASS1_BITS+3) 3648 & RANGE_MASK]; 3649 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 3650 CONST_BITS+PASS1_BITS+3) 3651 & RANGE_MASK]; 3652 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 3653 CONST_BITS+PASS1_BITS+3) 3654 & RANGE_MASK]; 3655 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 3656 CONST_BITS+PASS1_BITS+3) 3657 & RANGE_MASK]; 3658 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 3659 CONST_BITS+PASS1_BITS+3) 3660 & RANGE_MASK]; 3661 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 3662 CONST_BITS+PASS1_BITS+3) 3663 & RANGE_MASK]; 3664 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 3665 CONST_BITS+PASS1_BITS+3) 3666 & RANGE_MASK]; 3667 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 3668 CONST_BITS+PASS1_BITS+3) 3669 & RANGE_MASK]; 3670 3671 wsptr += 8; /* advance pointer to next row */ 3672 } 3673 } 3674 3675 3676 /* 3677 * Perform dequantization and inverse DCT on one block of coefficients, 3678 * producing a 8x4 output block. 3679 * 3680 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). 3681 */ 3682 3683 GLOBAL(void) 3684 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3685 JCOEFPTR coef_block, 3686 JSAMPARRAY output_buf, JDIMENSION output_col) 3687 { 3688 INT32 tmp0, tmp1, tmp2, tmp3; 3689 INT32 tmp10, tmp11, tmp12, tmp13; 3690 INT32 z1, z2, z3; 3691 JCOEFPTR inptr; 3692 ISLOW_MULT_TYPE * quantptr; 3693 int * wsptr; 3694 JSAMPROW outptr; 3695 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3696 int ctr; 3697 int workspace[8*4]; /* buffers data between passes */ 3698 SHIFT_TEMPS 3699 3700 /* Pass 1: process columns from input, store into work array. 3701 * 4-point IDCT kernel, 3702 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 3703 */ 3704 3705 inptr = coef_block; 3706 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3707 wsptr = workspace; 3708 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 3709 /* Even part */ 3710 3711 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3712 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 3713 3714 tmp10 = (tmp0 + tmp2) << PASS1_BITS; 3715 tmp12 = (tmp0 - tmp2) << PASS1_BITS; 3716 3717 /* Odd part */ 3718 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 3719 3720 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3721 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 3722 3723 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 3724 /* Add fudge factor here for final descale. */ 3725 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 3726 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ 3727 CONST_BITS-PASS1_BITS); 3728 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ 3729 CONST_BITS-PASS1_BITS); 3730 3731 /* Final output stage */ 3732 3733 wsptr[8*0] = (int) (tmp10 + tmp0); 3734 wsptr[8*3] = (int) (tmp10 - tmp0); 3735 wsptr[8*1] = (int) (tmp12 + tmp2); 3736 wsptr[8*2] = (int) (tmp12 - tmp2); 3737 } 3738 3739 /* Pass 2: process rows from work array, store into output array. 3740 * Note that we must descale the results by a factor of 8 == 2**3, 3741 * and also undo the PASS1_BITS scaling. 3742 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 3743 */ 3744 3745 wsptr = workspace; 3746 for (ctr = 0; ctr < 4; ctr++) { 3747 outptr = output_buf[ctr] + output_col; 3748 3749 /* Even part: reverse the even part of the forward DCT. 3750 * The rotator is c(-6). 3751 */ 3752 3753 /* Add range center and fudge factor for final descale and range-limit. */ 3754 z2 = (INT32) wsptr[0] + 3755 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3756 (ONE << (PASS1_BITS+2))); 3757 z3 = (INT32) wsptr[4]; 3758 3759 tmp0 = (z2 + z3) << CONST_BITS; 3760 tmp1 = (z2 - z3) << CONST_BITS; 3761 3762 z2 = (INT32) wsptr[2]; 3763 z3 = (INT32) wsptr[6]; 3764 3765 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 3766 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 3767 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 3768 3769 tmp10 = tmp0 + tmp2; 3770 tmp13 = tmp0 - tmp2; 3771 tmp11 = tmp1 + tmp3; 3772 tmp12 = tmp1 - tmp3; 3773 3774 /* Odd part per figure 8; the matrix is unitary and hence its 3775 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 3776 */ 3777 3778 tmp0 = (INT32) wsptr[7]; 3779 tmp1 = (INT32) wsptr[5]; 3780 tmp2 = (INT32) wsptr[3]; 3781 tmp3 = (INT32) wsptr[1]; 3782 3783 z2 = tmp0 + tmp2; 3784 z3 = tmp1 + tmp3; 3785 3786 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 3787 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 3788 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 3789 z2 += z1; 3790 z3 += z1; 3791 3792 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 3793 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 3794 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 3795 tmp0 += z1 + z2; 3796 tmp3 += z1 + z3; 3797 3798 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 3799 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 3800 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 3801 tmp1 += z1 + z3; 3802 tmp2 += z1 + z2; 3803 3804 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 3805 3806 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 3807 CONST_BITS+PASS1_BITS+3) 3808 & RANGE_MASK]; 3809 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 3810 CONST_BITS+PASS1_BITS+3) 3811 & RANGE_MASK]; 3812 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 3813 CONST_BITS+PASS1_BITS+3) 3814 & RANGE_MASK]; 3815 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 3816 CONST_BITS+PASS1_BITS+3) 3817 & RANGE_MASK]; 3818 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 3819 CONST_BITS+PASS1_BITS+3) 3820 & RANGE_MASK]; 3821 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 3822 CONST_BITS+PASS1_BITS+3) 3823 & RANGE_MASK]; 3824 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 3825 CONST_BITS+PASS1_BITS+3) 3826 & RANGE_MASK]; 3827 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 3828 CONST_BITS+PASS1_BITS+3) 3829 & RANGE_MASK]; 3830 3831 wsptr += DCTSIZE; /* advance pointer to next row */ 3832 } 3833 } 3834 3835 3836 /* 3837 * Perform dequantization and inverse DCT on one block of coefficients, 3838 * producing a reduced-size 6x3 output block. 3839 * 3840 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). 3841 */ 3842 3843 GLOBAL(void) 3844 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3845 JCOEFPTR coef_block, 3846 JSAMPARRAY output_buf, JDIMENSION output_col) 3847 { 3848 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 3849 INT32 z1, z2, z3; 3850 JCOEFPTR inptr; 3851 ISLOW_MULT_TYPE * quantptr; 3852 int * wsptr; 3853 JSAMPROW outptr; 3854 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3855 int ctr; 3856 int workspace[6*3]; /* buffers data between passes */ 3857 SHIFT_TEMPS 3858 3859 /* Pass 1: process columns from input, store into work array. 3860 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). 3861 */ 3862 3863 inptr = coef_block; 3864 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3865 wsptr = workspace; 3866 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 3867 /* Even part */ 3868 3869 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3870 tmp0 <<= CONST_BITS; 3871 /* Add fudge factor here for final descale. */ 3872 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 3873 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 3874 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 3875 tmp10 = tmp0 + tmp12; 3876 tmp2 = tmp0 - tmp12 - tmp12; 3877 3878 /* Odd part */ 3879 3880 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3881 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 3882 3883 /* Final output stage */ 3884 3885 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 3886 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 3887 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 3888 } 3889 3890 /* Pass 2: process 3 rows from work array, store into output array. 3891 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 3892 */ 3893 3894 wsptr = workspace; 3895 for (ctr = 0; ctr < 3; ctr++) { 3896 outptr = output_buf[ctr] + output_col; 3897 3898 /* Even part */ 3899 3900 /* Add range center and fudge factor for final descale and range-limit. */ 3901 tmp0 = (INT32) wsptr[0] + 3902 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 3903 (ONE << (PASS1_BITS+2))); 3904 tmp0 <<= CONST_BITS; 3905 tmp2 = (INT32) wsptr[4]; 3906 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 3907 tmp1 = tmp0 + tmp10; 3908 tmp11 = tmp0 - tmp10 - tmp10; 3909 tmp10 = (INT32) wsptr[2]; 3910 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 3911 tmp10 = tmp1 + tmp0; 3912 tmp12 = tmp1 - tmp0; 3913 3914 /* Odd part */ 3915 3916 z1 = (INT32) wsptr[1]; 3917 z2 = (INT32) wsptr[3]; 3918 z3 = (INT32) wsptr[5]; 3919 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 3920 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 3921 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 3922 tmp1 = (z1 - z2 - z3) << CONST_BITS; 3923 3924 /* Final output stage */ 3925 3926 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3927 CONST_BITS+PASS1_BITS+3) 3928 & RANGE_MASK]; 3929 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3930 CONST_BITS+PASS1_BITS+3) 3931 & RANGE_MASK]; 3932 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 3933 CONST_BITS+PASS1_BITS+3) 3934 & RANGE_MASK]; 3935 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 3936 CONST_BITS+PASS1_BITS+3) 3937 & RANGE_MASK]; 3938 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 3939 CONST_BITS+PASS1_BITS+3) 3940 & RANGE_MASK]; 3941 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 3942 CONST_BITS+PASS1_BITS+3) 3943 & RANGE_MASK]; 3944 3945 wsptr += 6; /* advance pointer to next row */ 3946 } 3947 } 3948 3949 3950 /* 3951 * Perform dequantization and inverse DCT on one block of coefficients, 3952 * producing a 4x2 output block. 3953 * 3954 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). 3955 */ 3956 3957 GLOBAL(void) 3958 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 3959 JCOEFPTR coef_block, 3960 JSAMPARRAY output_buf, JDIMENSION output_col) 3961 { 3962 INT32 tmp0, tmp2, tmp10, tmp12; 3963 INT32 z1, z2, z3; 3964 JCOEFPTR inptr; 3965 ISLOW_MULT_TYPE * quantptr; 3966 INT32 * wsptr; 3967 JSAMPROW outptr; 3968 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 3969 int ctr; 3970 INT32 workspace[4*2]; /* buffers data between passes */ 3971 SHIFT_TEMPS 3972 3973 /* Pass 1: process columns from input, store into work array. */ 3974 3975 inptr = coef_block; 3976 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 3977 wsptr = workspace; 3978 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { 3979 /* Even part */ 3980 3981 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 3982 3983 /* Odd part */ 3984 3985 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 3986 3987 /* Final output stage */ 3988 3989 wsptr[4*0] = tmp10 + tmp0; 3990 wsptr[4*1] = tmp10 - tmp0; 3991 } 3992 3993 /* Pass 2: process 2 rows from work array, store into output array. 3994 * 4-point IDCT kernel, 3995 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 3996 */ 3997 3998 wsptr = workspace; 3999 for (ctr = 0; ctr < 2; ctr++) { 4000 outptr = output_buf[ctr] + output_col; 4001 4002 /* Even part */ 4003 4004 /* Add range center and fudge factor for final descale and range-limit. */ 4005 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2)); 4006 tmp2 = wsptr[2]; 4007 4008 tmp10 = (tmp0 + tmp2) << CONST_BITS; 4009 tmp12 = (tmp0 - tmp2) << CONST_BITS; 4010 4011 /* Odd part */ 4012 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 4013 4014 z2 = wsptr[1]; 4015 z3 = wsptr[3]; 4016 4017 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 4018 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 4019 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 4020 4021 /* Final output stage */ 4022 4023 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 4024 CONST_BITS+3) 4025 & RANGE_MASK]; 4026 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 4027 CONST_BITS+3) 4028 & RANGE_MASK]; 4029 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 4030 CONST_BITS+3) 4031 & RANGE_MASK]; 4032 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 4033 CONST_BITS+3) 4034 & RANGE_MASK]; 4035 4036 wsptr += 4; /* advance pointer to next row */ 4037 } 4038 } 4039 4040 4041 /* 4042 * Perform dequantization and inverse DCT on one block of coefficients, 4043 * producing a 2x1 output block. 4044 * 4045 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). 4046 */ 4047 4048 GLOBAL(void) 4049 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4050 JCOEFPTR coef_block, 4051 JSAMPARRAY output_buf, JDIMENSION output_col) 4052 { 4053 DCTELEM tmp0, tmp1; 4054 ISLOW_MULT_TYPE * quantptr; 4055 JSAMPROW outptr; 4056 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4057 ISHIFT_TEMPS 4058 4059 /* Pass 1: empty. */ 4060 4061 /* Pass 2: process 1 row from input, store into output array. */ 4062 4063 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4064 outptr = output_buf[0] + output_col; 4065 4066 /* Even part */ 4067 4068 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]); 4069 /* Add range center and fudge factor for final descale and range-limit. */ 4070 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 4071 4072 /* Odd part */ 4073 4074 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]); 4075 4076 /* Final output stage */ 4077 4078 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 4079 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 4080 } 4081 4082 4083 /* 4084 * Perform dequantization and inverse DCT on one block of coefficients, 4085 * producing a 8x16 output block. 4086 * 4087 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). 4088 */ 4089 4090 GLOBAL(void) 4091 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4092 JCOEFPTR coef_block, 4093 JSAMPARRAY output_buf, JDIMENSION output_col) 4094 { 4095 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 4096 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 4097 INT32 z1, z2, z3, z4; 4098 JCOEFPTR inptr; 4099 ISLOW_MULT_TYPE * quantptr; 4100 int * wsptr; 4101 JSAMPROW outptr; 4102 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4103 int ctr; 4104 int workspace[8*16]; /* buffers data between passes */ 4105 SHIFT_TEMPS 4106 4107 /* Pass 1: process columns from input, store into work array. 4108 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 4109 */ 4110 4111 inptr = coef_block; 4112 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4113 wsptr = workspace; 4114 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 4115 /* Even part */ 4116 4117 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 4118 tmp0 <<= CONST_BITS; 4119 /* Add fudge factor here for final descale. */ 4120 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 4121 4122 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 4123 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 4124 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 4125 4126 tmp10 = tmp0 + tmp1; 4127 tmp11 = tmp0 - tmp1; 4128 tmp12 = tmp0 + tmp2; 4129 tmp13 = tmp0 - tmp2; 4130 4131 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 4132 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 4133 z3 = z1 - z2; 4134 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 4135 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 4136 4137 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 4138 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 4139 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 4140 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 4141 4142 tmp20 = tmp10 + tmp0; 4143 tmp27 = tmp10 - tmp0; 4144 tmp21 = tmp12 + tmp1; 4145 tmp26 = tmp12 - tmp1; 4146 tmp22 = tmp13 + tmp2; 4147 tmp25 = tmp13 - tmp2; 4148 tmp23 = tmp11 + tmp3; 4149 tmp24 = tmp11 - tmp3; 4150 4151 /* Odd part */ 4152 4153 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 4154 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 4155 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 4156 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 4157 4158 tmp11 = z1 + z3; 4159 4160 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 4161 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 4162 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 4163 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 4164 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 4165 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 4166 tmp0 = tmp1 + tmp2 + tmp3 - 4167 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 4168 tmp13 = tmp10 + tmp11 + tmp12 - 4169 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 4170 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 4171 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 4172 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 4173 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 4174 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 4175 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 4176 z2 += z4; 4177 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 4178 tmp1 += z1; 4179 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 4180 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 4181 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 4182 tmp12 += z2; 4183 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 4184 tmp2 += z2; 4185 tmp3 += z2; 4186 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 4187 tmp10 += z2; 4188 tmp11 += z2; 4189 4190 /* Final output stage */ 4191 4192 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 4193 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 4194 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 4195 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 4196 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 4197 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 4198 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 4199 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 4200 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 4201 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 4202 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 4203 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 4204 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 4205 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 4206 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 4207 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 4208 } 4209 4210 /* Pass 2: process rows from work array, store into output array. 4211 * Note that we must descale the results by a factor of 8 == 2**3, 4212 * and also undo the PASS1_BITS scaling. 4213 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 4214 */ 4215 4216 wsptr = workspace; 4217 for (ctr = 0; ctr < 16; ctr++) { 4218 outptr = output_buf[ctr] + output_col; 4219 4220 /* Even part: reverse the even part of the forward DCT. 4221 * The rotator is c(-6). 4222 */ 4223 4224 /* Add range center and fudge factor for final descale and range-limit. */ 4225 z2 = (INT32) wsptr[0] + 4226 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 4227 (ONE << (PASS1_BITS+2))); 4228 z3 = (INT32) wsptr[4]; 4229 4230 tmp0 = (z2 + z3) << CONST_BITS; 4231 tmp1 = (z2 - z3) << CONST_BITS; 4232 4233 z2 = (INT32) wsptr[2]; 4234 z3 = (INT32) wsptr[6]; 4235 4236 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 4237 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 4238 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 4239 4240 tmp10 = tmp0 + tmp2; 4241 tmp13 = tmp0 - tmp2; 4242 tmp11 = tmp1 + tmp3; 4243 tmp12 = tmp1 - tmp3; 4244 4245 /* Odd part per figure 8; the matrix is unitary and hence its 4246 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 4247 */ 4248 4249 tmp0 = (INT32) wsptr[7]; 4250 tmp1 = (INT32) wsptr[5]; 4251 tmp2 = (INT32) wsptr[3]; 4252 tmp3 = (INT32) wsptr[1]; 4253 4254 z2 = tmp0 + tmp2; 4255 z3 = tmp1 + tmp3; 4256 4257 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 4258 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 4259 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 4260 z2 += z1; 4261 z3 += z1; 4262 4263 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 4264 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 4265 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 4266 tmp0 += z1 + z2; 4267 tmp3 += z1 + z3; 4268 4269 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 4270 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 4271 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 4272 tmp1 += z1 + z3; 4273 tmp2 += z1 + z2; 4274 4275 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 4276 4277 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 4278 CONST_BITS+PASS1_BITS+3) 4279 & RANGE_MASK]; 4280 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 4281 CONST_BITS+PASS1_BITS+3) 4282 & RANGE_MASK]; 4283 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 4284 CONST_BITS+PASS1_BITS+3) 4285 & RANGE_MASK]; 4286 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 4287 CONST_BITS+PASS1_BITS+3) 4288 & RANGE_MASK]; 4289 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 4290 CONST_BITS+PASS1_BITS+3) 4291 & RANGE_MASK]; 4292 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 4293 CONST_BITS+PASS1_BITS+3) 4294 & RANGE_MASK]; 4295 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 4296 CONST_BITS+PASS1_BITS+3) 4297 & RANGE_MASK]; 4298 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 4299 CONST_BITS+PASS1_BITS+3) 4300 & RANGE_MASK]; 4301 4302 wsptr += DCTSIZE; /* advance pointer to next row */ 4303 } 4304 } 4305 4306 4307 /* 4308 * Perform dequantization and inverse DCT on one block of coefficients, 4309 * producing a 7x14 output block. 4310 * 4311 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows). 4312 */ 4313 4314 GLOBAL(void) 4315 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4316 JCOEFPTR coef_block, 4317 JSAMPARRAY output_buf, JDIMENSION output_col) 4318 { 4319 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 4320 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 4321 INT32 z1, z2, z3, z4; 4322 JCOEFPTR inptr; 4323 ISLOW_MULT_TYPE * quantptr; 4324 int * wsptr; 4325 JSAMPROW outptr; 4326 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4327 int ctr; 4328 int workspace[7*14]; /* buffers data between passes */ 4329 SHIFT_TEMPS 4330 4331 /* Pass 1: process columns from input, store into work array. 4332 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). 4333 */ 4334 4335 inptr = coef_block; 4336 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4337 wsptr = workspace; 4338 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 4339 /* Even part */ 4340 4341 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 4342 z1 <<= CONST_BITS; 4343 /* Add fudge factor here for final descale. */ 4344 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 4345 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 4346 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 4347 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 4348 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 4349 4350 tmp10 = z1 + z2; 4351 tmp11 = z1 + z3; 4352 tmp12 = z1 - z4; 4353 4354 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 4355 CONST_BITS-PASS1_BITS); 4356 4357 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 4358 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 4359 4360 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 4361 4362 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 4363 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 4364 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 4365 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 4366 4367 tmp20 = tmp10 + tmp13; 4368 tmp26 = tmp10 - tmp13; 4369 tmp21 = tmp11 + tmp14; 4370 tmp25 = tmp11 - tmp14; 4371 tmp22 = tmp12 + tmp15; 4372 tmp24 = tmp12 - tmp15; 4373 4374 /* Odd part */ 4375 4376 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 4377 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 4378 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 4379 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 4380 tmp13 = z4 << CONST_BITS; 4381 4382 tmp14 = z1 + z3; 4383 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 4384 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 4385 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 4386 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 4387 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 4388 z1 -= z2; 4389 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 4390 tmp16 += tmp15; 4391 z1 += z4; 4392 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 4393 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 4394 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 4395 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 4396 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 4397 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 4398 4399 tmp13 = (z1 - z3) << PASS1_BITS; 4400 4401 /* Final output stage */ 4402 4403 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 4404 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 4405 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 4406 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 4407 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 4408 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 4409 wsptr[7*3] = (int) (tmp23 + tmp13); 4410 wsptr[7*10] = (int) (tmp23 - tmp13); 4411 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 4412 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 4413 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 4414 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 4415 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 4416 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 4417 } 4418 4419 /* Pass 2: process 14 rows from work array, store into output array. 4420 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). 4421 */ 4422 4423 wsptr = workspace; 4424 for (ctr = 0; ctr < 14; ctr++) { 4425 outptr = output_buf[ctr] + output_col; 4426 4427 /* Even part */ 4428 4429 /* Add range center and fudge factor for final descale and range-limit. */ 4430 tmp23 = (INT32) wsptr[0] + 4431 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 4432 (ONE << (PASS1_BITS+2))); 4433 tmp23 <<= CONST_BITS; 4434 4435 z1 = (INT32) wsptr[2]; 4436 z2 = (INT32) wsptr[4]; 4437 z3 = (INT32) wsptr[6]; 4438 4439 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 4440 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 4441 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 4442 tmp10 = z1 + z3; 4443 z2 -= tmp10; 4444 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ 4445 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 4446 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 4447 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 4448 4449 /* Odd part */ 4450 4451 z1 = (INT32) wsptr[1]; 4452 z2 = (INT32) wsptr[3]; 4453 z3 = (INT32) wsptr[5]; 4454 4455 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 4456 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 4457 tmp10 = tmp11 - tmp12; 4458 tmp11 += tmp12; 4459 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 4460 tmp11 += tmp12; 4461 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 4462 tmp10 += z2; 4463 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 4464 4465 /* Final output stage */ 4466 4467 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 4468 CONST_BITS+PASS1_BITS+3) 4469 & RANGE_MASK]; 4470 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 4471 CONST_BITS+PASS1_BITS+3) 4472 & RANGE_MASK]; 4473 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 4474 CONST_BITS+PASS1_BITS+3) 4475 & RANGE_MASK]; 4476 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 4477 CONST_BITS+PASS1_BITS+3) 4478 & RANGE_MASK]; 4479 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 4480 CONST_BITS+PASS1_BITS+3) 4481 & RANGE_MASK]; 4482 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 4483 CONST_BITS+PASS1_BITS+3) 4484 & RANGE_MASK]; 4485 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23, 4486 CONST_BITS+PASS1_BITS+3) 4487 & RANGE_MASK]; 4488 4489 wsptr += 7; /* advance pointer to next row */ 4490 } 4491 } 4492 4493 4494 /* 4495 * Perform dequantization and inverse DCT on one block of coefficients, 4496 * producing a 6x12 output block. 4497 * 4498 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). 4499 */ 4500 4501 GLOBAL(void) 4502 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4503 JCOEFPTR coef_block, 4504 JSAMPARRAY output_buf, JDIMENSION output_col) 4505 { 4506 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 4507 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 4508 INT32 z1, z2, z3, z4; 4509 JCOEFPTR inptr; 4510 ISLOW_MULT_TYPE * quantptr; 4511 int * wsptr; 4512 JSAMPROW outptr; 4513 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4514 int ctr; 4515 int workspace[6*12]; /* buffers data between passes */ 4516 SHIFT_TEMPS 4517 4518 /* Pass 1: process columns from input, store into work array. 4519 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). 4520 */ 4521 4522 inptr = coef_block; 4523 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4524 wsptr = workspace; 4525 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 4526 /* Even part */ 4527 4528 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 4529 z3 <<= CONST_BITS; 4530 /* Add fudge factor here for final descale. */ 4531 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 4532 4533 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 4534 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 4535 4536 tmp10 = z3 + z4; 4537 tmp11 = z3 - z4; 4538 4539 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 4540 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 4541 z1 <<= CONST_BITS; 4542 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 4543 z2 <<= CONST_BITS; 4544 4545 tmp12 = z1 - z2; 4546 4547 tmp21 = z3 + tmp12; 4548 tmp24 = z3 - tmp12; 4549 4550 tmp12 = z4 + z2; 4551 4552 tmp20 = tmp10 + tmp12; 4553 tmp25 = tmp10 - tmp12; 4554 4555 tmp12 = z4 - z1 - z2; 4556 4557 tmp22 = tmp11 + tmp12; 4558 tmp23 = tmp11 - tmp12; 4559 4560 /* Odd part */ 4561 4562 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 4563 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 4564 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 4565 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 4566 4567 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 4568 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 4569 4570 tmp10 = z1 + z3; 4571 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 4572 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 4573 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 4574 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 4575 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 4576 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 4577 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 4578 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 4579 4580 z1 -= z4; 4581 z2 -= z3; 4582 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 4583 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 4584 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 4585 4586 /* Final output stage */ 4587 4588 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 4589 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 4590 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 4591 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 4592 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 4593 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 4594 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 4595 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 4596 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 4597 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 4598 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 4599 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 4600 } 4601 4602 /* Pass 2: process 12 rows from work array, store into output array. 4603 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 4604 */ 4605 4606 wsptr = workspace; 4607 for (ctr = 0; ctr < 12; ctr++) { 4608 outptr = output_buf[ctr] + output_col; 4609 4610 /* Even part */ 4611 4612 /* Add range center and fudge factor for final descale and range-limit. */ 4613 tmp10 = (INT32) wsptr[0] + 4614 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 4615 (ONE << (PASS1_BITS+2))); 4616 tmp10 <<= CONST_BITS; 4617 tmp12 = (INT32) wsptr[4]; 4618 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ 4619 tmp11 = tmp10 + tmp20; 4620 tmp21 = tmp10 - tmp20 - tmp20; 4621 tmp20 = (INT32) wsptr[2]; 4622 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ 4623 tmp20 = tmp11 + tmp10; 4624 tmp22 = tmp11 - tmp10; 4625 4626 /* Odd part */ 4627 4628 z1 = (INT32) wsptr[1]; 4629 z2 = (INT32) wsptr[3]; 4630 z3 = (INT32) wsptr[5]; 4631 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 4632 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); 4633 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); 4634 tmp11 = (z1 - z2 - z3) << CONST_BITS; 4635 4636 /* Final output stage */ 4637 4638 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 4639 CONST_BITS+PASS1_BITS+3) 4640 & RANGE_MASK]; 4641 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 4642 CONST_BITS+PASS1_BITS+3) 4643 & RANGE_MASK]; 4644 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 4645 CONST_BITS+PASS1_BITS+3) 4646 & RANGE_MASK]; 4647 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 4648 CONST_BITS+PASS1_BITS+3) 4649 & RANGE_MASK]; 4650 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 4651 CONST_BITS+PASS1_BITS+3) 4652 & RANGE_MASK]; 4653 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 4654 CONST_BITS+PASS1_BITS+3) 4655 & RANGE_MASK]; 4656 4657 wsptr += 6; /* advance pointer to next row */ 4658 } 4659 } 4660 4661 4662 /* 4663 * Perform dequantization and inverse DCT on one block of coefficients, 4664 * producing a 5x10 output block. 4665 * 4666 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows). 4667 */ 4668 4669 GLOBAL(void) 4670 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4671 JCOEFPTR coef_block, 4672 JSAMPARRAY output_buf, JDIMENSION output_col) 4673 { 4674 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 4675 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 4676 INT32 z1, z2, z3, z4, z5; 4677 JCOEFPTR inptr; 4678 ISLOW_MULT_TYPE * quantptr; 4679 int * wsptr; 4680 JSAMPROW outptr; 4681 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4682 int ctr; 4683 int workspace[5*10]; /* buffers data between passes */ 4684 SHIFT_TEMPS 4685 4686 /* Pass 1: process columns from input, store into work array. 4687 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). 4688 */ 4689 4690 inptr = coef_block; 4691 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4692 wsptr = workspace; 4693 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 4694 /* Even part */ 4695 4696 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 4697 z3 <<= CONST_BITS; 4698 /* Add fudge factor here for final descale. */ 4699 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 4700 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 4701 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 4702 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 4703 tmp10 = z3 + z1; 4704 tmp11 = z3 - z2; 4705 4706 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 4707 CONST_BITS-PASS1_BITS); 4708 4709 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 4710 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 4711 4712 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 4713 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 4714 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 4715 4716 tmp20 = tmp10 + tmp12; 4717 tmp24 = tmp10 - tmp12; 4718 tmp21 = tmp11 + tmp13; 4719 tmp23 = tmp11 - tmp13; 4720 4721 /* Odd part */ 4722 4723 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 4724 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 4725 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 4726 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 4727 4728 tmp11 = z2 + z4; 4729 tmp13 = z2 - z4; 4730 4731 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 4732 z5 = z3 << CONST_BITS; 4733 4734 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 4735 z4 = z5 + tmp12; 4736 4737 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 4738 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 4739 4740 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 4741 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 4742 4743 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 4744 4745 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 4746 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 4747 4748 /* Final output stage */ 4749 4750 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 4751 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 4752 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 4753 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 4754 wsptr[5*2] = (int) (tmp22 + tmp12); 4755 wsptr[5*7] = (int) (tmp22 - tmp12); 4756 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 4757 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 4758 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 4759 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 4760 } 4761 4762 /* Pass 2: process 10 rows from work array, store into output array. 4763 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). 4764 */ 4765 4766 wsptr = workspace; 4767 for (ctr = 0; ctr < 10; ctr++) { 4768 outptr = output_buf[ctr] + output_col; 4769 4770 /* Even part */ 4771 4772 /* Add range center and fudge factor for final descale and range-limit. */ 4773 tmp12 = (INT32) wsptr[0] + 4774 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 4775 (ONE << (PASS1_BITS+2))); 4776 tmp12 <<= CONST_BITS; 4777 tmp13 = (INT32) wsptr[2]; 4778 tmp14 = (INT32) wsptr[4]; 4779 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ 4780 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ 4781 z3 = tmp12 + z2; 4782 tmp10 = z3 + z1; 4783 tmp11 = z3 - z1; 4784 tmp12 -= z2 << 2; 4785 4786 /* Odd part */ 4787 4788 z2 = (INT32) wsptr[1]; 4789 z3 = (INT32) wsptr[3]; 4790 4791 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 4792 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 4793 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 4794 4795 /* Final output stage */ 4796 4797 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13, 4798 CONST_BITS+PASS1_BITS+3) 4799 & RANGE_MASK]; 4800 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13, 4801 CONST_BITS+PASS1_BITS+3) 4802 & RANGE_MASK]; 4803 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14, 4804 CONST_BITS+PASS1_BITS+3) 4805 & RANGE_MASK]; 4806 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14, 4807 CONST_BITS+PASS1_BITS+3) 4808 & RANGE_MASK]; 4809 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 4810 CONST_BITS+PASS1_BITS+3) 4811 & RANGE_MASK]; 4812 4813 wsptr += 5; /* advance pointer to next row */ 4814 } 4815 } 4816 4817 4818 /* 4819 * Perform dequantization and inverse DCT on one block of coefficients, 4820 * producing a 4x8 output block. 4821 * 4822 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). 4823 */ 4824 4825 GLOBAL(void) 4826 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 4827 JCOEFPTR coef_block, 4828 JSAMPARRAY output_buf, JDIMENSION output_col) 4829 { 4830 INT32 tmp0, tmp1, tmp2, tmp3; 4831 INT32 tmp10, tmp11, tmp12, tmp13; 4832 INT32 z1, z2, z3; 4833 JCOEFPTR inptr; 4834 ISLOW_MULT_TYPE * quantptr; 4835 int * wsptr; 4836 JSAMPROW outptr; 4837 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 4838 int ctr; 4839 int workspace[4*8]; /* buffers data between passes */ 4840 SHIFT_TEMPS 4841 4842 /* Pass 1: process columns from input, store into work array. 4843 * Note results are scaled up by sqrt(8) compared to a true IDCT; 4844 * furthermore, we scale the results by 2**PASS1_BITS. 4845 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 4846 */ 4847 4848 inptr = coef_block; 4849 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 4850 wsptr = workspace; 4851 for (ctr = 4; ctr > 0; ctr--) { 4852 /* Due to quantization, we will usually find that many of the input 4853 * coefficients are zero, especially the AC terms. We can exploit this 4854 * by short-circuiting the IDCT calculation for any column in which all 4855 * the AC terms are zero. In that case each output is equal to the 4856 * DC coefficient (with scale factor as needed). 4857 * With typical images and quantization tables, half or more of the 4858 * column DCT calculations can be simplified this way. 4859 */ 4860 4861 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 4862 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 4863 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 4864 inptr[DCTSIZE*7] == 0) { 4865 /* AC terms all zero */ 4866 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 4867 4868 wsptr[4*0] = dcval; 4869 wsptr[4*1] = dcval; 4870 wsptr[4*2] = dcval; 4871 wsptr[4*3] = dcval; 4872 wsptr[4*4] = dcval; 4873 wsptr[4*5] = dcval; 4874 wsptr[4*6] = dcval; 4875 wsptr[4*7] = dcval; 4876 4877 inptr++; /* advance pointers to next column */ 4878 quantptr++; 4879 wsptr++; 4880 continue; 4881 } 4882 4883 /* Even part: reverse the even part of the forward DCT. 4884 * The rotator is c(-6). 4885 */ 4886 4887 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 4888 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 4889 z2 <<= CONST_BITS; 4890 z3 <<= CONST_BITS; 4891 /* Add fudge factor here for final descale. */ 4892 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 4893 4894 tmp0 = z2 + z3; 4895 tmp1 = z2 - z3; 4896 4897 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 4898 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 4899 4900 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 4901 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 4902 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 4903 4904 tmp10 = tmp0 + tmp2; 4905 tmp13 = tmp0 - tmp2; 4906 tmp11 = tmp1 + tmp3; 4907 tmp12 = tmp1 - tmp3; 4908 4909 /* Odd part per figure 8; the matrix is unitary and hence its 4910 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 4911 */ 4912 4913 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 4914 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 4915 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 4916 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 4917 4918 z2 = tmp0 + tmp2; 4919 z3 = tmp1 + tmp3; 4920 4921 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 4922 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 4923 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 4924 z2 += z1; 4925 z3 += z1; 4926 4927 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 4928 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 4929 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 4930 tmp0 += z1 + z2; 4931 tmp3 += z1 + z3; 4932 4933 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 4934 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 4935 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 4936 tmp1 += z1 + z3; 4937 tmp2 += z1 + z2; 4938 4939 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 4940 4941 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 4942 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 4943 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 4944 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 4945 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 4946 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 4947 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 4948 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 4949 4950 inptr++; /* advance pointers to next column */ 4951 quantptr++; 4952 wsptr++; 4953 } 4954 4955 /* Pass 2: process 8 rows from work array, store into output array. 4956 * 4-point IDCT kernel, 4957 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 4958 */ 4959 4960 wsptr = workspace; 4961 for (ctr = 0; ctr < 8; ctr++) { 4962 outptr = output_buf[ctr] + output_col; 4963 4964 /* Even part */ 4965 4966 /* Add range center and fudge factor for final descale and range-limit. */ 4967 tmp0 = (INT32) wsptr[0] + 4968 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 4969 (ONE << (PASS1_BITS+2))); 4970 tmp2 = (INT32) wsptr[2]; 4971 4972 tmp10 = (tmp0 + tmp2) << CONST_BITS; 4973 tmp12 = (tmp0 - tmp2) << CONST_BITS; 4974 4975 /* Odd part */ 4976 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 4977 4978 z2 = (INT32) wsptr[1]; 4979 z3 = (INT32) wsptr[3]; 4980 4981 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 4982 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 4983 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 4984 4985 /* Final output stage */ 4986 4987 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 4988 CONST_BITS+PASS1_BITS+3) 4989 & RANGE_MASK]; 4990 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 4991 CONST_BITS+PASS1_BITS+3) 4992 & RANGE_MASK]; 4993 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 4994 CONST_BITS+PASS1_BITS+3) 4995 & RANGE_MASK]; 4996 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 4997 CONST_BITS+PASS1_BITS+3) 4998 & RANGE_MASK]; 4999 5000 wsptr += 4; /* advance pointer to next row */ 5001 } 5002 } 5003 5004 5005 /* 5006 * Perform dequantization and inverse DCT on one block of coefficients, 5007 * producing a reduced-size 3x6 output block. 5008 * 5009 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows). 5010 */ 5011 5012 GLOBAL(void) 5013 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 5014 JCOEFPTR coef_block, 5015 JSAMPARRAY output_buf, JDIMENSION output_col) 5016 { 5017 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 5018 INT32 z1, z2, z3; 5019 JCOEFPTR inptr; 5020 ISLOW_MULT_TYPE * quantptr; 5021 int * wsptr; 5022 JSAMPROW outptr; 5023 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 5024 int ctr; 5025 int workspace[3*6]; /* buffers data between passes */ 5026 SHIFT_TEMPS 5027 5028 /* Pass 1: process columns from input, store into work array. 5029 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 5030 */ 5031 5032 inptr = coef_block; 5033 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 5034 wsptr = workspace; 5035 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 5036 /* Even part */ 5037 5038 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 5039 tmp0 <<= CONST_BITS; 5040 /* Add fudge factor here for final descale. */ 5041 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 5042 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 5043 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 5044 tmp1 = tmp0 + tmp10; 5045 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 5046 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 5047 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 5048 tmp10 = tmp1 + tmp0; 5049 tmp12 = tmp1 - tmp0; 5050 5051 /* Odd part */ 5052 5053 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 5054 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 5055 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 5056 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 5057 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 5058 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 5059 tmp1 = (z1 - z2 - z3) << PASS1_BITS; 5060 5061 /* Final output stage */ 5062 5063 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 5064 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 5065 wsptr[3*1] = (int) (tmp11 + tmp1); 5066 wsptr[3*4] = (int) (tmp11 - tmp1); 5067 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 5068 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 5069 } 5070 5071 /* Pass 2: process 6 rows from work array, store into output array. 5072 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). 5073 */ 5074 5075 wsptr = workspace; 5076 for (ctr = 0; ctr < 6; ctr++) { 5077 outptr = output_buf[ctr] + output_col; 5078 5079 /* Even part */ 5080 5081 /* Add range center and fudge factor for final descale and range-limit. */ 5082 tmp0 = (INT32) wsptr[0] + 5083 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 5084 (ONE << (PASS1_BITS+2))); 5085 tmp0 <<= CONST_BITS; 5086 tmp2 = (INT32) wsptr[2]; 5087 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 5088 tmp10 = tmp0 + tmp12; 5089 tmp2 = tmp0 - tmp12 - tmp12; 5090 5091 /* Odd part */ 5092 5093 tmp12 = (INT32) wsptr[1]; 5094 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 5095 5096 /* Final output stage */ 5097 5098 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 5099 CONST_BITS+PASS1_BITS+3) 5100 & RANGE_MASK]; 5101 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 5102 CONST_BITS+PASS1_BITS+3) 5103 & RANGE_MASK]; 5104 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 5105 CONST_BITS+PASS1_BITS+3) 5106 & RANGE_MASK]; 5107 5108 wsptr += 3; /* advance pointer to next row */ 5109 } 5110 } 5111 5112 5113 /* 5114 * Perform dequantization and inverse DCT on one block of coefficients, 5115 * producing a 2x4 output block. 5116 * 5117 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). 5118 */ 5119 5120 GLOBAL(void) 5121 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 5122 JCOEFPTR coef_block, 5123 JSAMPARRAY output_buf, JDIMENSION output_col) 5124 { 5125 INT32 tmp0, tmp2, tmp10, tmp12; 5126 INT32 z1, z2, z3; 5127 JCOEFPTR inptr; 5128 ISLOW_MULT_TYPE * quantptr; 5129 INT32 * wsptr; 5130 JSAMPROW outptr; 5131 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 5132 int ctr; 5133 INT32 workspace[2*4]; /* buffers data between passes */ 5134 SHIFT_TEMPS 5135 5136 /* Pass 1: process columns from input, store into work array. 5137 * 4-point IDCT kernel, 5138 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 5139 */ 5140 5141 inptr = coef_block; 5142 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 5143 wsptr = workspace; 5144 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { 5145 /* Even part */ 5146 5147 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 5148 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 5149 5150 tmp10 = (tmp0 + tmp2) << CONST_BITS; 5151 tmp12 = (tmp0 - tmp2) << CONST_BITS; 5152 5153 /* Odd part */ 5154 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 5155 5156 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 5157 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 5158 5159 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 5160 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 5161 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 5162 5163 /* Final output stage */ 5164 5165 wsptr[2*0] = tmp10 + tmp0; 5166 wsptr[2*3] = tmp10 - tmp0; 5167 wsptr[2*1] = tmp12 + tmp2; 5168 wsptr[2*2] = tmp12 - tmp2; 5169 } 5170 5171 /* Pass 2: process 4 rows from work array, store into output array. */ 5172 5173 wsptr = workspace; 5174 for (ctr = 0; ctr < 4; ctr++) { 5175 outptr = output_buf[ctr] + output_col; 5176 5177 /* Even part */ 5178 5179 /* Add range center and fudge factor for final descale and range-limit. */ 5180 tmp10 = wsptr[0] + 5181 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) + 5182 (ONE << (CONST_BITS+2))); 5183 5184 /* Odd part */ 5185 5186 tmp0 = wsptr[1]; 5187 5188 /* Final output stage */ 5189 5190 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3) 5191 & RANGE_MASK]; 5192 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3) 5193 & RANGE_MASK]; 5194 5195 wsptr += 2; /* advance pointer to next row */ 5196 } 5197 } 5198 5199 5200 /* 5201 * Perform dequantization and inverse DCT on one block of coefficients, 5202 * producing a 1x2 output block. 5203 * 5204 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows). 5205 */ 5206 5207 GLOBAL(void) 5208 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 5209 JCOEFPTR coef_block, 5210 JSAMPARRAY output_buf, JDIMENSION output_col) 5211 { 5212 DCTELEM tmp0, tmp1; 5213 ISLOW_MULT_TYPE * quantptr; 5214 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 5215 ISHIFT_TEMPS 5216 5217 /* Process 1 column from input, store into output array. */ 5218 5219 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 5220 5221 /* Even part */ 5222 5223 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); 5224 /* Add range center and fudge factor for final descale and range-limit. */ 5225 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 5226 5227 /* Odd part */ 5228 5229 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); 5230 5231 /* Final output stage */ 5232 5233 output_buf[0][output_col] = 5234 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 5235 output_buf[1][output_col] = 5236 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 5237 } 5238 5239 #endif /* IDCT_SCALING_SUPPORTED */ 5240 #endif /* DCT_ISLOW_SUPPORTED */ 5241