1 /*
2  * jcdctmgr.c
3  *
4  * Copyright (C) 1994-1996, Thomas G. Lane.
5  * This file is part of the Independent JPEG Group's software.
6  * For conditions of distribution and use, see the accompanying README file.
7  *
8  * This file contains the forward-DCT management logic.
9  * This code selects a particular DCT implementation to be used,
10  * and it performs related housekeeping chores including coefficient
11  * quantization.
12  */
13 
14 #define JPEG_INTERNALS
15 #include "jinclude.h"
16 #include "jpeglib.h"
17 #include "jdct.h"		/* Private declarations for DCT subsystem */
18 
19 
20 /* Private subobject for this module */
21 
22 typedef struct {
23   struct jpeg_forward_dct pub;	/* public fields */
24 
25   /* Pointer to the DCT routine actually in use */
26   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
27 
28   /* The actual post-DCT divisors --- not identical to the quant table
29    * entries, because of scaling (especially for an unnormalized DCT).
30    * Each table is given in normal array order.
31    */
32   DCTELEM * divisors[NUM_QUANT_TBLS];
33 
34 #ifdef DCT_FLOAT_SUPPORTED
35   /* Same as above for the floating-point case. */
36   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
37   FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
38 #endif
39 } my_fdct_controller;
40 
41 typedef my_fdct_controller * my_fdct_ptr;
42 
43 
44 /* The current scaled-DCT routines require ISLOW-style divisor tables,
45  * so be sure to compile that code if either ISLOW or SCALING is requested.
46  */
47 #ifdef DCT_ISLOW_SUPPORTED
48 #define PROVIDE_ISLOW_TABLES
49 #else
50 #ifdef DCT_SCALING_SUPPORTED
51 #define PROVIDE_ISLOW_TABLES
52 #endif
53 #endif
54 
55 
56 /*
57  * Perform forward DCT on one or more blocks of a component.
58  *
59  * The input samples are taken from the sample_data[] array starting at
60  * position start_row/start_col, and moving to the right for any additional
61  * blocks. The quantized coefficients are returned in coef_blocks[].
62  */
63 
64 METHODDEF(void)
forward_DCT(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)65 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
66 	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
67 	     JDIMENSION start_row, JDIMENSION start_col,
68 	     JDIMENSION num_blocks)
69 /* This version is used for integer DCT implementations. */
70 {
71   /* This routine is heavily used, so it's worth coding it tightly. */
72   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
73   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
74   DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
75   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
76   JDIMENSION bi;
77 
78   sample_data += start_row;	/* fold in the vertical offset once */
79 
80   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
81     /* Perform the DCT */
82     (*do_dct) (workspace, sample_data, start_col);
83 
84     /* Quantize/descale the coefficients, and store into coef_blocks[] */
85     { register DCTELEM temp, qval;
86       register int i;
87       register JCOEFPTR output_ptr = coef_blocks[bi];
88 
89       for (i = 0; i < DCTSIZE2; i++) {
90 	qval = divisors[i];
91 	temp = workspace[i];
92 	/* Divide the coefficient value by qval, ensuring proper rounding.
93 	 * Since C does not specify the direction of rounding for negative
94 	 * quotients, we have to force the dividend positive for portability.
95 	 *
96 	 * In most files, at least half of the output values will be zero
97 	 * (at default quantization settings, more like three-quarters...)
98 	 * so we should ensure that this case is fast.  On many machines,
99 	 * a comparison is enough cheaper than a divide to make a special test
100 	 * a win.  Since both inputs will be nonnegative, we need only test
101 	 * for a < b to discover whether a/b is 0.
102 	 * If your machine's division is fast enough, define FAST_DIVIDE.
103 	 */
104 #ifdef FAST_DIVIDE
105 #define DIVIDE_BY(a,b)	a /= b
106 #else
107 #define DIVIDE_BY(a,b)	if (a >= b) a /= b; else a = 0
108 #endif
109 	if (temp < 0) {
110 	  temp = -temp;
111 	  temp += qval>>1;	/* for rounding */
112 	  DIVIDE_BY(temp, qval);
113 	  temp = -temp;
114 	} else {
115 	  temp += qval>>1;	/* for rounding */
116 	  DIVIDE_BY(temp, qval);
117 	}
118 	output_ptr[i] = (JCOEF) temp;
119       }
120     }
121   }
122 }
123 
124 
125 #ifdef DCT_FLOAT_SUPPORTED
126 
127 METHODDEF(void)
forward_DCT_float(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks)128 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
129 		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
130 		   JDIMENSION start_row, JDIMENSION start_col,
131 		   JDIMENSION num_blocks)
132 /* This version is used for floating-point DCT implementations. */
133 {
134   /* This routine is heavily used, so it's worth coding it tightly. */
135   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
136   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
137   FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
138   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
139   JDIMENSION bi;
140 
141   sample_data += start_row;	/* fold in the vertical offset once */
142 
143   for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) {
144     /* Perform the DCT */
145     (*do_dct) (workspace, sample_data, start_col);
146 
147     /* Quantize/descale the coefficients, and store into coef_blocks[] */
148     { register FAST_FLOAT temp;
149       register int i;
150       register JCOEFPTR output_ptr = coef_blocks[bi];
151 
152       for (i = 0; i < DCTSIZE2; i++) {
153 	/* Apply the quantization and scaling factor */
154 	temp = workspace[i] * divisors[i];
155 	/* Round to nearest integer.
156 	 * Since C does not specify the direction of rounding for negative
157 	 * quotients, we have to force the dividend positive for portability.
158 	 * The maximum coefficient size is +-16K (for 12-bit data), so this
159 	 * code should work for either 16-bit or 32-bit ints.
160 	 */
161 	output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
162       }
163     }
164   }
165 }
166 
167 #endif /* DCT_FLOAT_SUPPORTED */
168 
169 
170 /*
171  * Initialize for a processing pass.
172  * Verify that all referenced Q-tables are present, and set up
173  * the divisor table for each one.
174  * In the current implementation, DCT of all components is done during
175  * the first pass, even if only some components will be output in the
176  * first scan.  Hence all components should be examined here.
177  */
178 
179 METHODDEF(void)
start_pass_fdctmgr(j_compress_ptr cinfo)180 start_pass_fdctmgr (j_compress_ptr cinfo)
181 {
182   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
183   int ci, qtblno, i;
184   jpeg_component_info *compptr;
185   int method = 0;
186   JQUANT_TBL * qtbl;
187   DCTELEM * dtbl;
188 
189   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
190        ci++, compptr++) {
191     /* Select the proper DCT routine for this component's scaling */
192     switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) {
193 #ifdef DCT_SCALING_SUPPORTED
194     case ((1 << 8) + 1):
195       fdct->do_dct[ci] = jpeg_fdct_1x1;
196       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
197       break;
198     case ((2 << 8) + 2):
199       fdct->do_dct[ci] = jpeg_fdct_2x2;
200       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
201       break;
202     case ((3 << 8) + 3):
203       fdct->do_dct[ci] = jpeg_fdct_3x3;
204       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
205       break;
206     case ((4 << 8) + 4):
207       fdct->do_dct[ci] = jpeg_fdct_4x4;
208       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
209       break;
210     case ((5 << 8) + 5):
211       fdct->do_dct[ci] = jpeg_fdct_5x5;
212       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
213       break;
214     case ((6 << 8) + 6):
215       fdct->do_dct[ci] = jpeg_fdct_6x6;
216       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
217       break;
218     case ((7 << 8) + 7):
219       fdct->do_dct[ci] = jpeg_fdct_7x7;
220       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
221       break;
222     case ((9 << 8) + 9):
223       fdct->do_dct[ci] = jpeg_fdct_9x9;
224       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
225       break;
226     case ((10 << 8) + 10):
227       fdct->do_dct[ci] = jpeg_fdct_10x10;
228       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
229       break;
230     case ((11 << 8) + 11):
231       fdct->do_dct[ci] = jpeg_fdct_11x11;
232       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
233       break;
234     case ((12 << 8) + 12):
235       fdct->do_dct[ci] = jpeg_fdct_12x12;
236       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
237       break;
238     case ((13 << 8) + 13):
239       fdct->do_dct[ci] = jpeg_fdct_13x13;
240       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
241       break;
242     case ((14 << 8) + 14):
243       fdct->do_dct[ci] = jpeg_fdct_14x14;
244       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
245       break;
246     case ((15 << 8) + 15):
247       fdct->do_dct[ci] = jpeg_fdct_15x15;
248       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
249       break;
250     case ((16 << 8) + 16):
251       fdct->do_dct[ci] = jpeg_fdct_16x16;
252       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
253       break;
254     case ((16 << 8) + 8):
255       fdct->do_dct[ci] = jpeg_fdct_16x8;
256       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
257       break;
258     case ((14 << 8) + 7):
259       fdct->do_dct[ci] = jpeg_fdct_14x7;
260       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
261       break;
262     case ((12 << 8) + 6):
263       fdct->do_dct[ci] = jpeg_fdct_12x6;
264       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
265       break;
266     case ((10 << 8) + 5):
267       fdct->do_dct[ci] = jpeg_fdct_10x5;
268       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
269       break;
270     case ((8 << 8) + 4):
271       fdct->do_dct[ci] = jpeg_fdct_8x4;
272       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
273       break;
274     case ((6 << 8) + 3):
275       fdct->do_dct[ci] = jpeg_fdct_6x3;
276       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
277       break;
278     case ((4 << 8) + 2):
279       fdct->do_dct[ci] = jpeg_fdct_4x2;
280       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
281       break;
282     case ((2 << 8) + 1):
283       fdct->do_dct[ci] = jpeg_fdct_2x1;
284       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
285       break;
286     case ((8 << 8) + 16):
287       fdct->do_dct[ci] = jpeg_fdct_8x16;
288       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
289       break;
290     case ((7 << 8) + 14):
291       fdct->do_dct[ci] = jpeg_fdct_7x14;
292       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
293       break;
294     case ((6 << 8) + 12):
295       fdct->do_dct[ci] = jpeg_fdct_6x12;
296       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
297       break;
298     case ((5 << 8) + 10):
299       fdct->do_dct[ci] = jpeg_fdct_5x10;
300       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
301       break;
302     case ((4 << 8) + 8):
303       fdct->do_dct[ci] = jpeg_fdct_4x8;
304       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
305       break;
306     case ((3 << 8) + 6):
307       fdct->do_dct[ci] = jpeg_fdct_3x6;
308       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
309       break;
310     case ((2 << 8) + 4):
311       fdct->do_dct[ci] = jpeg_fdct_2x4;
312       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
313       break;
314     case ((1 << 8) + 2):
315       fdct->do_dct[ci] = jpeg_fdct_1x2;
316       method = JDCT_ISLOW;	/* jfdctint uses islow-style table */
317       break;
318 #endif
319     case ((DCTSIZE << 8) + DCTSIZE):
320       switch (cinfo->dct_method) {
321 #ifdef DCT_ISLOW_SUPPORTED
322       case JDCT_ISLOW:
323 	fdct->do_dct[ci] = jpeg_fdct_islow;
324 	method = JDCT_ISLOW;
325 	break;
326 #endif
327 #ifdef DCT_IFAST_SUPPORTED
328       case JDCT_IFAST:
329 	fdct->do_dct[ci] = jpeg_fdct_ifast;
330 	method = JDCT_IFAST;
331 	break;
332 #endif
333 #ifdef DCT_FLOAT_SUPPORTED
334       case JDCT_FLOAT:
335 	fdct->do_float_dct[ci] = jpeg_fdct_float;
336 	method = JDCT_FLOAT;
337 	break;
338 #endif
339       default:
340 	ERREXIT(cinfo, JERR_NOT_COMPILED);
341 	break;
342       }
343       break;
344     default:
345       ERREXIT2(cinfo, JERR_BAD_DCTSIZE,
346 	       compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size);
347       break;
348     }
349     qtblno = compptr->quant_tbl_no;
350     /* Make sure specified quantization table is present */
351     if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
352 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
353       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
354     qtbl = cinfo->quant_tbl_ptrs[qtblno];
355     /* Compute divisors for this quant table */
356     /* We may do this more than once for same table, but it's not a big deal */
357     switch (method) {
358 #ifdef PROVIDE_ISLOW_TABLES
359     case JDCT_ISLOW:
360       /* For LL&M IDCT method, divisors are equal to raw quantization
361        * coefficients multiplied by 8 (to counteract scaling).
362        */
363       if (fdct->divisors[qtblno] == NULL) {
364 	fdct->divisors[qtblno] = (DCTELEM *)
365 	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
366 				      DCTSIZE2 * SIZEOF(DCTELEM));
367       }
368       dtbl = fdct->divisors[qtblno];
369       for (i = 0; i < DCTSIZE2; i++) {
370 	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
371       }
372       fdct->pub.forward_DCT[ci] = forward_DCT;
373       break;
374 #endif
375 #ifdef DCT_IFAST_SUPPORTED
376     case JDCT_IFAST:
377       {
378 	/* For AA&N IDCT method, divisors are equal to quantization
379 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
380 	 *   scalefactor[0] = 1
381 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
382 	 * We apply a further scale factor of 8.
383 	 */
384 #define CONST_BITS 14
385 	static const INT16 aanscales[DCTSIZE2] = {
386 	  /* precomputed values scaled up by 14 bits */
387 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
388 	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
389 	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
390 	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
391 	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
392 	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
393 	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
394 	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
395 	};
396 	SHIFT_TEMPS
397 
398 	if (fdct->divisors[qtblno] == NULL) {
399 	  fdct->divisors[qtblno] = (DCTELEM *)
400 	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
401 					DCTSIZE2 * SIZEOF(DCTELEM));
402 	}
403 	dtbl = fdct->divisors[qtblno];
404 	for (i = 0; i < DCTSIZE2; i++) {
405 	  dtbl[i] = (DCTELEM)
406 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
407 				  (INT32) aanscales[i]),
408 		    CONST_BITS-3);
409 	}
410       }
411       fdct->pub.forward_DCT[ci] = forward_DCT;
412       break;
413 #endif
414 #ifdef DCT_FLOAT_SUPPORTED
415     case JDCT_FLOAT:
416       {
417 	/* For float AA&N IDCT method, divisors are equal to quantization
418 	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
419 	 *   scalefactor[0] = 1
420 	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
421 	 * We apply a further scale factor of 8.
422 	 * What's actually stored is 1/divisor so that the inner loop can
423 	 * use a multiplication rather than a division.
424 	 */
425 	FAST_FLOAT * fdtbl;
426 	int row, col;
427 	static const double aanscalefactor[DCTSIZE] = {
428 	  1.0, 1.387039845, 1.306562965, 1.175875602,
429 	  1.0, 0.785694958, 0.541196100, 0.275899379
430 	};
431 
432 	if (fdct->float_divisors[qtblno] == NULL) {
433 	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
434 	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
435 					DCTSIZE2 * SIZEOF(FAST_FLOAT));
436 	}
437 	fdtbl = fdct->float_divisors[qtblno];
438 	i = 0;
439 	for (row = 0; row < DCTSIZE; row++) {
440 	  for (col = 0; col < DCTSIZE; col++) {
441 	    fdtbl[i] = (FAST_FLOAT)
442 	      (1.0 / (((double) qtbl->quantval[i] *
443 		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
444 	    i++;
445 	  }
446 	}
447       }
448       fdct->pub.forward_DCT[ci] = forward_DCT_float;
449       break;
450 #endif
451     default:
452       ERREXIT(cinfo, JERR_NOT_COMPILED);
453       break;
454     }
455   }
456 }
457 
458 
459 /*
460  * Initialize FDCT manager.
461  */
462 
463 GLOBAL(void)
jinit_forward_dct(j_compress_ptr cinfo)464 jinit_forward_dct (j_compress_ptr cinfo)
465 {
466   my_fdct_ptr fdct;
467   int i;
468 
469   fdct = (my_fdct_ptr)
470     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
471 				SIZEOF(my_fdct_controller));
472   cinfo->fdct = (struct jpeg_forward_dct *) fdct;
473   fdct->pub.start_pass = start_pass_fdctmgr;
474 
475   /* Mark divisor tables unallocated */
476   for (i = 0; i < NUM_QUANT_TBLS; i++) {
477     fdct->divisors[i] = NULL;
478 #ifdef DCT_FLOAT_SUPPORTED
479     fdct->float_divisors[i] = NULL;
480 #endif
481   }
482 }
483