1 /*
2 * jcdctmgr.c
3 *
4 * Copyright (C) 1994-1996, Thomas G. Lane.
5 * Modified 2003-2013 by Guido Vollbeding.
6 * This file is part of the Independent JPEG Group's software.
7 * For conditions of distribution and use, see the accompanying README file.
8 *
9 * This file contains the forward-DCT management logic.
10 * This code selects a particular DCT implementation to be used,
11 * and it performs related housekeeping chores including coefficient
12 * quantization.
13 */
14
15 #define JPEG_INTERNALS
16 #include "jinclude.h"
17 #include "jpeglib.h"
18 #include "jdct.h" /* Private declarations for DCT subsystem */
19
20
21 /* Private subobject for this module */
22
23 typedef struct {
24 struct jpeg_forward_dct pub; /* public fields */
25
26 /* Pointer to the DCT routine actually in use */
27 forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
28
29 #ifdef DCT_FLOAT_SUPPORTED
30 /* Same as above for the floating-point case. */
31 float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
32 #endif
33 } my_fdct_controller;
34
35 typedef my_fdct_controller * my_fdct_ptr;
36
37
38 /* The allocated post-DCT divisor tables -- big enough for any
39 * supported variant and not identical to the quant table entries,
40 * because of scaling (especially for an unnormalized DCT) --
41 * are pointed to by dct_table in the per-component comp_info
42 * structures. Each table is given in normal array order.
43 */
44
45 typedef union {
46 DCTELEM int_array[DCTSIZE2];
47 #ifdef DCT_FLOAT_SUPPORTED
48 FAST_FLOAT float_array[DCTSIZE2];
49 #endif
50 } divisor_table;
51
52
53 /* The current scaled-DCT routines require ISLOW-style divisor tables,
54 * so be sure to compile that code if either ISLOW or SCALING is requested.
55 */
56 #ifdef DCT_ISLOW_SUPPORTED
57 #define PROVIDE_ISLOW_TABLES
58 #else
59 #ifdef DCT_SCALING_SUPPORTED
60 #define PROVIDE_ISLOW_TABLES
61 #endif
62 #endif
63
64
65 /*
66 * Perform forward DCT on one or more blocks of a component.
67 *
68 * The input samples are taken from the sample_data[] array starting at
69 * position start_row/start_col, and moving to the right for any additional
70 * blocks. The quantized coefficients are returned in coef_blocks[].
71 */
72
73 METHODDEF(void)
forward_DCT(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)74 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
75 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
76 JDIMENSION start_row, JDIMENSION start_col,
77 JDIMENSION num_blocks)
78 /* This version is used for integer DCT implementations. */
79 {
80 /* This routine is heavily used, so it's worth coding it tightly. */
81 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
82 forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
83 DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
84 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
85 JDIMENSION bi;
86
87 sample_data += start_row; /* fold in the vertical offset once */
88
89 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
90 /* Perform the DCT */
91 (*do_dct) (workspace, sample_data, start_col);
92
93 /* Quantize/descale the coefficients, and store into coef_blocks[] */
94 { register DCTELEM temp, qval;
95 register int i;
96 register JCOEFPTR output_ptr = coef_blocks[bi];
97
98 for (i = 0; i < DCTSIZE2; i++) {
99 qval = divisors[i];
100 temp = workspace[i];
101 /* Divide the coefficient value by qval, ensuring proper rounding.
102 * Since C does not specify the direction of rounding for negative
103 * quotients, we have to force the dividend positive for portability.
104 *
105 * In most files, at least half of the output values will be zero
106 * (at default quantization settings, more like three-quarters...)
107 * so we should ensure that this case is fast. On many machines,
108 * a comparison is enough cheaper than a divide to make a special test
109 * a win. Since both inputs will be nonnegative, we need only test
110 * for a < b to discover whether a/b is 0.
111 * If your machine's division is fast enough, define FAST_DIVIDE.
112 */
113 #ifdef FAST_DIVIDE
114 #define DIVIDE_BY(a,b) a /= b
115 #else
116 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
117 #endif
118 if (temp < 0) {
119 temp = -temp;
120 temp += qval>>1; /* for rounding */
121 DIVIDE_BY(temp, qval);
122 temp = -temp;
123 } else {
124 temp += qval>>1; /* for rounding */
125 DIVIDE_BY(temp, qval);
126 }
127 output_ptr[i] = (JCOEF) temp;
128 }
129 }
130 }
131 }
132
133
134 #ifdef DCT_FLOAT_SUPPORTED
135
136 METHODDEF(void)
forward_DCT_float(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)137 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
138 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
139 JDIMENSION start_row, JDIMENSION start_col,
140 JDIMENSION num_blocks)
141 /* This version is used for floating-point DCT implementations. */
142 {
143 /* This routine is heavily used, so it's worth coding it tightly. */
144 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
145 float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
146 FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
147 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
148 JDIMENSION bi;
149
150 sample_data += start_row; /* fold in the vertical offset once */
151
152 for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
153 /* Perform the DCT */
154 (*do_dct) (workspace, sample_data, start_col);
155
156 /* Quantize/descale the coefficients, and store into coef_blocks[] */
157 { register FAST_FLOAT temp;
158 register int i;
159 register JCOEFPTR output_ptr = coef_blocks[bi];
160
161 for (i = 0; i < DCTSIZE2; i++) {
162 /* Apply the quantization and scaling factor */
163 temp = workspace[i] * divisors[i];
164 /* Round to nearest integer.
165 * Since C does not specify the direction of rounding for negative
166 * quotients, we have to force the dividend positive for portability.
167 * The maximum coefficient size is +-16K (for 12-bit data), so this
168 * code should work for either 16-bit or 32-bit ints.
169 */
170 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
171 }
172 }
173 }
174 }
175
176 #endif /* DCT_FLOAT_SUPPORTED */
177
178
179 /*
180 * Initialize for a processing pass.
181 * Verify that all referenced Q-tables are present, and set up
182 * the divisor table for each one.
183 * In the current implementation, DCT of all components is done during
184 * the first pass, even if only some components will be output in the
185 * first scan. Hence all components should be examined here.
186 */
187
188 METHODDEF(void)
start_pass_fdctmgr(j_compress_ptr cinfo)189 start_pass_fdctmgr (j_compress_ptr cinfo)
190 {
191 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
192 int ci, qtblno, i;
193 jpeg_component_info *compptr;
194 int method = 0;
195 JQUANT_TBL * qtbl;
196 DCTELEM * dtbl;
197
198 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
199 ci++, compptr++) {
200 /* Select the proper DCT routine for this component's scaling */
201 switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
202 #ifdef DCT_SCALING_SUPPORTED
203 case ((1 << 8) + 1):
204 fdct->do_dct[ci] = jpeg_fdct_1x1;
205 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
206 break;
207 case ((2 << 8) + 2):
208 fdct->do_dct[ci] = jpeg_fdct_2x2;
209 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
210 break;
211 case ((3 << 8) + 3):
212 fdct->do_dct[ci] = jpeg_fdct_3x3;
213 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
214 break;
215 case ((4 << 8) + 4):
216 fdct->do_dct[ci] = jpeg_fdct_4x4;
217 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
218 break;
219 case ((5 << 8) + 5):
220 fdct->do_dct[ci] = jpeg_fdct_5x5;
221 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
222 break;
223 case ((6 << 8) + 6):
224 fdct->do_dct[ci] = jpeg_fdct_6x6;
225 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
226 break;
227 case ((7 << 8) + 7):
228 fdct->do_dct[ci] = jpeg_fdct_7x7;
229 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
230 break;
231 case ((9 << 8) + 9):
232 fdct->do_dct[ci] = jpeg_fdct_9x9;
233 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
234 break;
235 case ((10 << 8) + 10):
236 fdct->do_dct[ci] = jpeg_fdct_10x10;
237 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
238 break;
239 case ((11 << 8) + 11):
240 fdct->do_dct[ci] = jpeg_fdct_11x11;
241 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
242 break;
243 case ((12 << 8) + 12):
244 fdct->do_dct[ci] = jpeg_fdct_12x12;
245 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
246 break;
247 case ((13 << 8) + 13):
248 fdct->do_dct[ci] = jpeg_fdct_13x13;
249 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
250 break;
251 case ((14 << 8) + 14):
252 fdct->do_dct[ci] = jpeg_fdct_14x14;
253 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
254 break;
255 case ((15 << 8) + 15):
256 fdct->do_dct[ci] = jpeg_fdct_15x15;
257 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
258 break;
259 case ((16 << 8) + 16):
260 fdct->do_dct[ci] = jpeg_fdct_16x16;
261 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
262 break;
263 case ((16 << 8) + 8):
264 fdct->do_dct[ci] = jpeg_fdct_16x8;
265 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
266 break;
267 case ((14 << 8) + 7):
268 fdct->do_dct[ci] = jpeg_fdct_14x7;
269 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
270 break;
271 case ((12 << 8) + 6):
272 fdct->do_dct[ci] = jpeg_fdct_12x6;
273 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
274 break;
275 case ((10 << 8) + 5):
276 fdct->do_dct[ci] = jpeg_fdct_10x5;
277 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
278 break;
279 case ((8 << 8) + 4):
280 fdct->do_dct[ci] = jpeg_fdct_8x4;
281 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
282 break;
283 case ((6 << 8) + 3):
284 fdct->do_dct[ci] = jpeg_fdct_6x3;
285 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
286 break;
287 case ((4 << 8) + 2):
288 fdct->do_dct[ci] = jpeg_fdct_4x2;
289 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
290 break;
291 case ((2 << 8) + 1):
292 fdct->do_dct[ci] = jpeg_fdct_2x1;
293 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
294 break;
295 case ((8 << 8) + 16):
296 fdct->do_dct[ci] = jpeg_fdct_8x16;
297 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
298 break;
299 case ((7 << 8) + 14):
300 fdct->do_dct[ci] = jpeg_fdct_7x14;
301 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
302 break;
303 case ((6 << 8) + 12):
304 fdct->do_dct[ci] = jpeg_fdct_6x12;
305 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
306 break;
307 case ((5 << 8) + 10):
308 fdct->do_dct[ci] = jpeg_fdct_5x10;
309 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
310 break;
311 case ((4 << 8) + 8):
312 fdct->do_dct[ci] = jpeg_fdct_4x8;
313 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
314 break;
315 case ((3 << 8) + 6):
316 fdct->do_dct[ci] = jpeg_fdct_3x6;
317 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
318 break;
319 case ((2 << 8) + 4):
320 fdct->do_dct[ci] = jpeg_fdct_2x4;
321 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
322 break;
323 case ((1 << 8) + 2):
324 fdct->do_dct[ci] = jpeg_fdct_1x2;
325 method = JDCT_ISLOW; /* jfdctint uses islow-style table */
326 break;
327 #endif
328 case ((DCTSIZE << 8) + DCTSIZE):
329 switch (cinfo->dct_method) {
330 #ifdef DCT_ISLOW_SUPPORTED
331 case JDCT_ISLOW:
332 fdct->do_dct[ci] = jpeg_fdct_islow;
333 method = JDCT_ISLOW;
334 break;
335 #endif
336 #ifdef DCT_IFAST_SUPPORTED
337 case JDCT_IFAST:
338 fdct->do_dct[ci] = jpeg_fdct_ifast;
339 method = JDCT_IFAST;
340 break;
341 #endif
342 #ifdef DCT_FLOAT_SUPPORTED
343 case JDCT_FLOAT:
344 fdct->do_float_dct[ci] = jpeg_fdct_float;
345 method = JDCT_FLOAT;
346 break;
347 #endif
348 default:
349 ERREXIT(cinfo, JERR_NOT_COMPILED);
350 break;
351 }
352 break;
353 default:
354 ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
355 compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
356 break;
357 }
358 qtblno = compptr->quant_tbl_no;
359 /* Make sure specified quantization table is present */
360 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
361 cinfo->quant_tbl_ptrs[qtblno] == NULL)
362 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
363 qtbl = cinfo->quant_tbl_ptrs[qtblno];
364 /* Create divisor table from quant table */
365 switch (method) {
366 #ifdef PROVIDE_ISLOW_TABLES
367 case JDCT_ISLOW:
368 /* For LL&M IDCT method, divisors are equal to raw quantization
369 * coefficients multiplied by 8 (to counteract scaling).
370 */
371 dtbl = (DCTELEM *) compptr->dct_table;
372 for (i = 0; i < DCTSIZE2; i++) {
373 dtbl[i] =
374 ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
375 }
376 fdct->pub.forward_DCT[ci] = forward_DCT;
377 break;
378 #endif
379 #ifdef DCT_IFAST_SUPPORTED
380 case JDCT_IFAST:
381 {
382 /* For AA&N IDCT method, divisors are equal to quantization
383 * coefficients scaled by scalefactor[row]*scalefactor[col], where
384 * scalefactor[0] = 1
385 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
386 * We apply a further scale factor of 8.
387 */
388 #define CONST_BITS 14
389 static const INT16 aanscales[DCTSIZE2] = {
390 /* precomputed values scaled up by 14 bits */
391 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
392 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
393 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
394 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
395 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
396 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
397 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
398 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
399 };
400 SHIFT_TEMPS
401
402 dtbl = (DCTELEM *) compptr->dct_table;
403 for (i = 0; i < DCTSIZE2; i++) {
404 dtbl[i] = (DCTELEM)
405 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
406 (INT32) aanscales[i]),
407 compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
408 }
409 }
410 fdct->pub.forward_DCT[ci] = forward_DCT;
411 break;
412 #endif
413 #ifdef DCT_FLOAT_SUPPORTED
414 case JDCT_FLOAT:
415 {
416 /* For float AA&N IDCT method, divisors are equal to quantization
417 * coefficients scaled by scalefactor[row]*scalefactor[col], where
418 * scalefactor[0] = 1
419 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
420 * We apply a further scale factor of 8.
421 * What's actually stored is 1/divisor so that the inner loop can
422 * use a multiplication rather than a division.
423 */
424 FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
425 int row, col;
426 static const double aanscalefactor[DCTSIZE] = {
427 1.0, 1.387039845, 1.306562965, 1.175875602,
428 1.0, 0.785694958, 0.541196100, 0.275899379
429 };
430
431 i = 0;
432 for (row = 0; row < DCTSIZE; row++) {
433 for (col = 0; col < DCTSIZE; col++) {
434 fdtbl[i] = (FAST_FLOAT)
435 (1.0 / ((double) qtbl->quantval[i] *
436 aanscalefactor[row] * aanscalefactor[col] *
437 (compptr->component_needed ? 16.0 : 8.0)));
438 i++;
439 }
440 }
441 }
442 fdct->pub.forward_DCT[ci] = forward_DCT_float;
443 break;
444 #endif
445 default:
446 ERREXIT(cinfo, JERR_NOT_COMPILED);
447 break;
448 }
449 }
450 }
451
452
453 /*
454 * Initialize FDCT manager.
455 */
456
457 GLOBAL(void)
jinit_forward_dct(j_compress_ptr cinfo)458 jinit_forward_dct (j_compress_ptr cinfo)
459 {
460 my_fdct_ptr fdct;
461 int ci;
462 jpeg_component_info *compptr;
463
464 fdct = (my_fdct_ptr)
465 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
466 SIZEOF(my_fdct_controller));
467 cinfo->fdct = &fdct->pub;
468 fdct->pub.start_pass = start_pass_fdctmgr;
469
470 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
471 ci++, compptr++) {
472 /* Allocate a divisor table for each component */
473 compptr->dct_table =
474 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
475 SIZEOF(divisor_table));
476 }
477 }
478