1 /*
2  * jcdctmgr.c
3  *
4  * Copyright (C) 1994-1996, Thomas G. Lane.
5  * Modified 2003-2013 by Guido Vollbeding.
6  * This file is part of the Independent JPEG Group's software.
7  * For conditions of distribution and use, see the accompanying README file.
8  *
9  * This file contains the forward-DCT management logic.
10  * This code selects a particular DCT implementation to be used,
11  * and it performs related housekeeping chores including coefficient
12  * quantization.
13  */
14 
15 #define JPEG_INTERNALS
16 #include "jinclude.h"
17 #include "jpeglib.h"
18 #include "jdct.h"		/* Private declarations for DCT subsystem */
19 
20 
21 /* Private subobject for this module */
22 
23 typedef struct {
24   struct jpeg_forward_dct pub;	/* public fields */
25 
26   /* Pointer to the DCT routine actually in use */
27   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
28 
29 #ifdef DCT_FLOAT_SUPPORTED
30   /* Same as above for the floating-point case. */
31   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
32 #endif
33 } my_fdct_controller;
34 
35 typedef my_fdct_controller * my_fdct_ptr;
36 
37 
38 /* The allocated post-DCT divisor tables -- big enough for any
39  * supported variant and not identical to the quant table entries,
40  * because of scaling (especially for an unnormalized DCT) --
41  * are pointed to by dct_table in the per-component comp_info
42  * structures.  Each table is given in normal array order.
43  */
44 
45 typedef union {
46   DCTELEM int_array[DCTSIZE2];
47 #ifdef DCT_FLOAT_SUPPORTED
48   FAST_FLOAT float_array[DCTSIZE2];
49 #endif
50 } divisor_table;
51 
52 
53 /* The current scaled-DCT routines require ISLOW-style divisor tables,
54  * so be sure to compile that code if either ISLOW or SCALING is requested.
55  */
56 #ifdef DCT_ISLOW_SUPPORTED
57 #define PROVIDE_ISLOW_TABLES
58 #else
59 #ifdef DCT_SCALING_SUPPORTED
60 #define PROVIDE_ISLOW_TABLES
61 #endif
62 #endif
63 
64 
65 /*
66  * Perform forward DCT on one or more blocks of a component.
67  *
68  * The input samples are taken from the sample_data[] array starting at
69  * position start_row/start_col, and moving to the right for any additional
70  * blocks. The quantized coefficients are returned in coef_blocks[].
71  */
72 
73 METHODDEF(void)
forward_DCT(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)74 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
75 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
76 	     JDIMENSION start_row, JDIMENSION start_col,
77 	     JDIMENSION num_blocks)
78 /* This version is used for integer DCT implementations. */
79 {
80   /* This routine is heavily used, so it's worth coding it tightly. */
81   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
82   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
83   DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
84   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
85   JDIMENSION bi;
86 
87   sample_data += start_row;	/* fold in the vertical offset once */
88 
89   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
90     /* Perform the DCT */
91     (*do_dct) (workspace, sample_data, start_col);
92 
93     /* Quantize/descale the coefficients, and store into coef_blocks[] */
94     { register DCTELEM temp, qval;
95       register int i;
96       register JCOEFPTR output_ptr = coef_blocks[bi];
97 
98       for (i = 0; i < DCTSIZE2; i++) {
99 	qval = divisors[i];
100 	temp = workspace[i];
101 	/* Divide the coefficient value by qval, ensuring proper rounding.
102 	 * Since C does not specify the direction of rounding for negative
103 	 * quotients, we have to force the dividend positive for portability.
104 	 *
105 	 * In most files, at least half of the output values will be zero
106 	 * (at default quantization settings, more like three-quarters...)
107 	 * so we should ensure that this case is fast.  On many machines,
108 	 * a comparison is enough cheaper than a divide to make a special test
109 	 * a win.  Since both inputs will be nonnegative, we need only test
110 	 * for a < b to discover whether a/b is 0.
111 	 * If your machine's division is fast enough, define FAST_DIVIDE.
112 	 */
113 #ifdef FAST_DIVIDE
114 #define DIVIDE_BY(a,b)	a /= b
115 #else
116 #define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
117 #endif
118 	if (temp < 0) {
119 	  temp = -temp;
120 	  temp += qval>>1;	/* for rounding */
121 	  DIVIDE_BY(temp, qval);
122 	  temp = -temp;
123 	} else {
124 	  temp += qval>>1;	/* for rounding */
125 	  DIVIDE_BY(temp, qval);
126 	}
127 	output_ptr[i] = (JCOEF) temp;
128       }
129     }
130   }
131 }
132 
133 
134 #ifdef DCT_FLOAT_SUPPORTED
135 
136 METHODDEF(void)
forward_DCT_float(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)137 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
138 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
139 		   JDIMENSION start_row, JDIMENSION start_col,
140 		   JDIMENSION num_blocks)
141 /* This version is used for floating-point DCT implementations. */
142 {
143   /* This routine is heavily used, so it's worth coding it tightly. */
144   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
145   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
146   FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
147   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
148   JDIMENSION bi;
149 
150   sample_data += start_row;	/* fold in the vertical offset once */
151 
152   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
153     /* Perform the DCT */
154     (*do_dct) (workspace, sample_data, start_col);
155 
156     /* Quantize/descale the coefficients, and store into coef_blocks[] */
157     { register FAST_FLOAT temp;
158       register int i;
159       register JCOEFPTR output_ptr = coef_blocks[bi];
160 
161       for (i = 0; i < DCTSIZE2; i++) {
162 	/* Apply the quantization and scaling factor */
163 	temp = workspace[i] * divisors[i];
164 	/* Round to nearest integer.
165 	 * Since C does not specify the direction of rounding for negative
166 	 * quotients, we have to force the dividend positive for portability.
167 	 * The maximum coefficient size is +-16K (for 12-bit data), so this
168 	 * code should work for either 16-bit or 32-bit ints.
169 	 */
170 	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
171       }
172     }
173   }
174 }
175 
176 #endif /* DCT_FLOAT_SUPPORTED */
177 
178 
179 /*
180  * Initialize for a processing pass.
181  * Verify that all referenced Q-tables are present, and set up
182  * the divisor table for each one.
183  * In the current implementation, DCT of all components is done during
184  * the first pass, even if only some components will be output in the
185  * first scan.  Hence all components should be examined here.
186  */
187 
188 METHODDEF(void)
start_pass_fdctmgr(j_compress_ptr cinfo)189 start_pass_fdctmgr (j_compress_ptr cinfo)
190 {
191   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
192   int ci, qtblno, i;
193   jpeg_component_info *compptr;
194   int method = 0;
195   JQUANT_TBL * qtbl;
196   DCTELEM * dtbl;
197 
198   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
199        ci++, compptr++) {
200     /* Select the proper DCT routine for this component's scaling */
201     switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
202 #ifdef DCT_SCALING_SUPPORTED
203     case ((1 << 8) + 1):
204       fdct->do_dct[ci] = jpeg_fdct_1x1;
205       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
206       break;
207     case ((2 << 8) + 2):
208       fdct->do_dct[ci] = jpeg_fdct_2x2;
209       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
210       break;
211     case ((3 << 8) + 3):
212       fdct->do_dct[ci] = jpeg_fdct_3x3;
213       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
214       break;
215     case ((4 << 8) + 4):
216       fdct->do_dct[ci] = jpeg_fdct_4x4;
217       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
218       break;
219     case ((5 << 8) + 5):
220       fdct->do_dct[ci] = jpeg_fdct_5x5;
221       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
222       break;
223     case ((6 << 8) + 6):
224       fdct->do_dct[ci] = jpeg_fdct_6x6;
225       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
226       break;
227     case ((7 << 8) + 7):
228       fdct->do_dct[ci] = jpeg_fdct_7x7;
229       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
230       break;
231     case ((9 << 8) + 9):
232       fdct->do_dct[ci] = jpeg_fdct_9x9;
233       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
234       break;
235     case ((10 << 8) + 10):
236       fdct->do_dct[ci] = jpeg_fdct_10x10;
237       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
238       break;
239     case ((11 << 8) + 11):
240       fdct->do_dct[ci] = jpeg_fdct_11x11;
241       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
242       break;
243     case ((12 << 8) + 12):
244       fdct->do_dct[ci] = jpeg_fdct_12x12;
245       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
246       break;
247     case ((13 << 8) + 13):
248       fdct->do_dct[ci] = jpeg_fdct_13x13;
249       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
250       break;
251     case ((14 << 8) + 14):
252       fdct->do_dct[ci] = jpeg_fdct_14x14;
253       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
254       break;
255     case ((15 << 8) + 15):
256       fdct->do_dct[ci] = jpeg_fdct_15x15;
257       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
258       break;
259     case ((16 << 8) + 16):
260       fdct->do_dct[ci] = jpeg_fdct_16x16;
261       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
262       break;
263     case ((16 << 8) + 8):
264       fdct->do_dct[ci] = jpeg_fdct_16x8;
265       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
266       break;
267     case ((14 << 8) + 7):
268       fdct->do_dct[ci] = jpeg_fdct_14x7;
269       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
270       break;
271     case ((12 << 8) + 6):
272       fdct->do_dct[ci] = jpeg_fdct_12x6;
273       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
274       break;
275     case ((10 << 8) + 5):
276       fdct->do_dct[ci] = jpeg_fdct_10x5;
277       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
278       break;
279     case ((8 << 8) + 4):
280       fdct->do_dct[ci] = jpeg_fdct_8x4;
281       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
282       break;
283     case ((6 << 8) + 3):
284       fdct->do_dct[ci] = jpeg_fdct_6x3;
285       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
286       break;
287     case ((4 << 8) + 2):
288       fdct->do_dct[ci] = jpeg_fdct_4x2;
289       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
290       break;
291     case ((2 << 8) + 1):
292       fdct->do_dct[ci] = jpeg_fdct_2x1;
293       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
294       break;
295     case ((8 << 8) + 16):
296       fdct->do_dct[ci] = jpeg_fdct_8x16;
297       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
298       break;
299     case ((7 << 8) + 14):
300       fdct->do_dct[ci] = jpeg_fdct_7x14;
301       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
302       break;
303     case ((6 << 8) + 12):
304       fdct->do_dct[ci] = jpeg_fdct_6x12;
305       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
306       break;
307     case ((5 << 8) + 10):
308       fdct->do_dct[ci] = jpeg_fdct_5x10;
309       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
310       break;
311     case ((4 << 8) + 8):
312       fdct->do_dct[ci] = jpeg_fdct_4x8;
313       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
314       break;
315     case ((3 << 8) + 6):
316       fdct->do_dct[ci] = jpeg_fdct_3x6;
317       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
318       break;
319     case ((2 << 8) + 4):
320       fdct->do_dct[ci] = jpeg_fdct_2x4;
321       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
322       break;
323     case ((1 << 8) + 2):
324       fdct->do_dct[ci] = jpeg_fdct_1x2;
325       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
326       break;
327 #endif
328     case ((DCTSIZE << 8) + DCTSIZE):
329       switch (cinfo->dct_method) {
330 #ifdef DCT_ISLOW_SUPPORTED
331       case JDCT_ISLOW:
332 	fdct->do_dct[ci] = jpeg_fdct_islow;
333 	method = JDCT_ISLOW;
334 	break;
335 #endif
336 #ifdef DCT_IFAST_SUPPORTED
337       case JDCT_IFAST:
338 	fdct->do_dct[ci] = jpeg_fdct_ifast;
339 	method = JDCT_IFAST;
340 	break;
341 #endif
342 #ifdef DCT_FLOAT_SUPPORTED
343       case JDCT_FLOAT:
344 	fdct->do_float_dct[ci] = jpeg_fdct_float;
345 	method = JDCT_FLOAT;
346 	break;
347 #endif
348       default:
349 	ERREXIT(cinfo, JERR_NOT_COMPILED);
350 	break;
351       }
352       break;
353     default:
354       ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
355 	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
356       break;
357     }
358     qtblno = compptr->quant_tbl_no;
359     /* Make sure specified quantization table is present */
360     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
361 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
362       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
363     qtbl = cinfo->quant_tbl_ptrs[qtblno];
364     /* Create divisor table from quant table */
365     switch (method) {
366 #ifdef PROVIDE_ISLOW_TABLES
367     case JDCT_ISLOW:
368       /* For LL&M IDCT method, divisors are equal to raw quantization
369        * coefficients multiplied by 8 (to counteract scaling).
370        */
371       dtbl = (DCTELEM *) compptr->dct_table;
372       for (i = 0; i < DCTSIZE2; i++) {
373 	dtbl[i] =
374 	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
375       }
376       fdct->pub.forward_DCT[ci] = forward_DCT;
377       break;
378 #endif
379 #ifdef DCT_IFAST_SUPPORTED
380     case JDCT_IFAST:
381       {
382 	/* For AA&N IDCT method, divisors are equal to quantization
383 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
384 	 *   scalefactor[0] = 1
385 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
386 	 * We apply a further scale factor of 8.
387 	 */
388 #define CONST_BITS 14
389 	static const INT16 aanscales[DCTSIZE2] = {
390 	  /* precomputed values scaled up by 14 bits */
391 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
392 	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
393 	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
394 	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
395 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
396 	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
397 	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
398 	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
399 	};
400 	SHIFT_TEMPS
401 
402 	dtbl = (DCTELEM *) compptr->dct_table;
403 	for (i = 0; i < DCTSIZE2; i++) {
404 	  dtbl[i] = (DCTELEM)
405 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
406 				  (INT32) aanscales[i]),
407 		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
408 	}
409       }
410       fdct->pub.forward_DCT[ci] = forward_DCT;
411       break;
412 #endif
413 #ifdef DCT_FLOAT_SUPPORTED
414     case JDCT_FLOAT:
415       {
416 	/* For float AA&N IDCT method, divisors are equal to quantization
417 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
418 	 *   scalefactor[0] = 1
419 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
420 	 * We apply a further scale factor of 8.
421 	 * What's actually stored is 1/divisor so that the inner loop can
422 	 * use a multiplication rather than a division.
423 	 */
424 	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
425 	int row, col;
426 	static const double aanscalefactor[DCTSIZE] = {
427 	  1.0, 1.387039845, 1.306562965, 1.175875602,
428 	  1.0, 0.785694958, 0.541196100, 0.275899379
429 	};
430 
431 	i = 0;
432 	for (row = 0; row < DCTSIZE; row++) {
433 	  for (col = 0; col < DCTSIZE; col++) {
434 	    fdtbl[i] = (FAST_FLOAT)
435 	      (1.0 / ((double) qtbl->quantval[i] *
436 		      aanscalefactor[row] * aanscalefactor[col] *
437 		      (compptr->component_needed ? 16.0 : 8.0)));
438 	    i++;
439 	  }
440 	}
441       }
442       fdct->pub.forward_DCT[ci] = forward_DCT_float;
443       break;
444 #endif
445     default:
446       ERREXIT(cinfo, JERR_NOT_COMPILED);
447       break;
448     }
449   }
450 }
451 
452 
453 /*
454  * Initialize FDCT manager.
455  */
456 
457 GLOBAL(void)
jinit_forward_dct(j_compress_ptr cinfo)458 jinit_forward_dct (j_compress_ptr cinfo)
459 {
460   my_fdct_ptr fdct;
461   int ci;
462   jpeg_component_info *compptr;
463 
464   fdct = (my_fdct_ptr)
465     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
466 				SIZEOF(my_fdct_controller));
467   cinfo->fdct = &fdct->pub;
468   fdct->pub.start_pass = start_pass_fdctmgr;
469 
470   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
471        ci++, compptr++) {
472     /* Allocate a divisor table for each component */
473     compptr->dct_table =
474       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
475 				  SIZEOF(divisor_table));
476   }
477 }
478