1 /*
2 * jcdctmgr.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1994-1996, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright (C) 1999-2006, MIYASAKA Masaru.
8 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
9 * Copyright (C) 2011, 2014-2015 D. R. Commander
10 * mozjpeg Modifications:
11 * Copyright (C) 2014, Mozilla Corporation.
12 * For conditions of distribution and use, see the accompanying README file.
13 *
14 * This file contains the forward-DCT management logic.
15 * This code selects a particular DCT implementation to be used,
16 * and it performs related housekeeping chores including coefficient
17 * quantization.
18 */
19
20 #define JPEG_INTERNALS
21 #include "jinclude.h"
22 #include "jpeglib.h"
23 #include "jdct.h" /* Private declarations for DCT subsystem */
24 #include "jsimddct.h"
25 #include "jchuff.h"
26 #include <assert.h>
27 #include <math.h>
28
29
30 /* Private subobject for this module */
31
32 typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
33 typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
34
35 typedef void (*preprocess_method_ptr)(DCTELEM*, const JQUANT_TBL*);
36 typedef void (*float_preprocess_method_ptr)(FAST_FLOAT*, const JQUANT_TBL*);
37
38 typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
39 JDIMENSION start_col,
40 DCTELEM *workspace);
41 typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
42 JDIMENSION start_col,
43 FAST_FLOAT *workspace);
44
45 typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
46 DCTELEM *workspace);
47 typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
48 FAST_FLOAT *divisors,
49 FAST_FLOAT *workspace);
50
51 METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
52
53 typedef struct {
54 struct jpeg_forward_dct pub; /* public fields */
55
56 /* Pointer to the DCT routine actually in use */
57 forward_DCT_method_ptr dct;
58 convsamp_method_ptr convsamp;
59 preprocess_method_ptr preprocess;
60 quantize_method_ptr quantize;
61
62 /* The actual post-DCT divisors --- not identical to the quant table
63 * entries, because of scaling (especially for an unnormalized DCT).
64 * Each table is given in normal array order.
65 */
66 DCTELEM *divisors[NUM_QUANT_TBLS];
67
68 /* work area for FDCT subroutine */
69 DCTELEM *workspace;
70
71 #ifdef DCT_FLOAT_SUPPORTED
72 /* Same as above for the floating-point case. */
73 float_DCT_method_ptr float_dct;
74 float_convsamp_method_ptr float_convsamp;
75 float_preprocess_method_ptr float_preprocess;
76 float_quantize_method_ptr float_quantize;
77 FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
78 FAST_FLOAT *float_workspace;
79 #endif
80 } my_fdct_controller;
81
82 typedef my_fdct_controller *my_fdct_ptr;
83
84
85 #if BITS_IN_JSAMPLE == 8
86
87 /*
88 * Find the highest bit in an integer through binary search.
89 */
90
91 LOCAL(int)
flss(UINT16 val)92 flss (UINT16 val)
93 {
94 int bit;
95
96 bit = 16;
97
98 if (!val)
99 return 0;
100
101 if (!(val & 0xff00)) {
102 bit -= 8;
103 val <<= 8;
104 }
105 if (!(val & 0xf000)) {
106 bit -= 4;
107 val <<= 4;
108 }
109 if (!(val & 0xc000)) {
110 bit -= 2;
111 val <<= 2;
112 }
113 if (!(val & 0x8000)) {
114 bit -= 1;
115 val <<= 1;
116 }
117
118 return bit;
119 }
120
121
122 /*
123 * Compute values to do a division using reciprocal.
124 *
125 * This implementation is based on an algorithm described in
126 * "How to optimize for the Pentium family of microprocessors"
127 * (http://www.agner.org/assem/).
128 * More information about the basic algorithm can be found in
129 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
130 *
131 * The basic idea is to replace x/d by x * d^-1. In order to store
132 * d^-1 with enough precision we shift it left a few places. It turns
133 * out that this algoright gives just enough precision, and also fits
134 * into DCTELEM:
135 *
136 * b = (the number of significant bits in divisor) - 1
137 * r = (word size) + b
138 * f = 2^r / divisor
139 *
140 * f will not be an integer for most cases, so we need to compensate
141 * for the rounding error introduced:
142 *
143 * no fractional part:
144 *
145 * result = input >> r
146 *
147 * fractional part of f < 0.5:
148 *
149 * round f down to nearest integer
150 * result = ((input + 1) * f) >> r
151 *
152 * fractional part of f > 0.5:
153 *
154 * round f up to nearest integer
155 * result = (input * f) >> r
156 *
157 * This is the original algorithm that gives truncated results. But we
158 * want properly rounded results, so we replace "input" with
159 * "input + divisor/2".
160 *
161 * In order to allow SIMD implementations we also tweak the values to
162 * allow the same calculation to be made at all times:
163 *
164 * dctbl[0] = f rounded to nearest integer
165 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
166 * dctbl[2] = 1 << ((word size) * 2 - r)
167 * dctbl[3] = r - (word size)
168 *
169 * dctbl[2] is for stupid instruction sets where the shift operation
170 * isn't member wise (e.g. MMX).
171 *
172 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
173 * is that most SIMD implementations have a "multiply and store top
174 * half" operation.
175 *
176 * Lastly, we store each of the values in their own table instead
177 * of in a consecutive manner, yet again in order to allow SIMD
178 * routines.
179 */
180
181 LOCAL(int)
compute_reciprocal(UINT16 divisor,DCTELEM * dtbl)182 compute_reciprocal (UINT16 divisor, DCTELEM *dtbl)
183 {
184 UDCTELEM2 fq, fr;
185 UDCTELEM c;
186 int b, r;
187
188 if (divisor == 1) {
189 /* divisor == 1 means unquantized, so these reciprocal/correction/shift
190 * values will cause the C quantization algorithm to act like the
191 * identity function. Since only the C quantization algorithm is used in
192 * these cases, the scale value is irrelevant.
193 */
194 dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */
195 dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */
196 dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */
197 dtbl[DCTSIZE2 * 3] = -(DCTELEM) (sizeof(DCTELEM) * 8); /* shift */
198 return 0;
199 }
200
201 b = flss(divisor) - 1;
202 r = sizeof(DCTELEM) * 8 + b;
203
204 fq = ((UDCTELEM2)1 << r) / divisor;
205 fr = ((UDCTELEM2)1 << r) % divisor;
206
207 c = divisor / 2; /* for rounding */
208
209 if (fr == 0) { /* divisor is power of two */
210 /* fq will be one bit too large to fit in DCTELEM, so adjust */
211 fq >>= 1;
212 r--;
213 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
214 c++;
215 } else { /* fractional part is > 0.5 */
216 fq++;
217 }
218
219 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
220 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
221 #ifdef WITH_SIMD
222 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
223 #else
224 dtbl[DCTSIZE2 * 2] = 1;
225 #endif
226 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
227
228 if (r <= 16) return 0;
229 else return 1;
230 }
231
232 #endif
233
234
235 /*
236 * Initialize for a processing pass.
237 * Verify that all referenced Q-tables are present, and set up
238 * the divisor table for each one.
239 * In the current implementation, DCT of all components is done during
240 * the first pass, even if only some components will be output in the
241 * first scan. Hence all components should be examined here.
242 */
243
244 METHODDEF(void)
start_pass_fdctmgr(j_compress_ptr cinfo)245 start_pass_fdctmgr (j_compress_ptr cinfo)
246 {
247 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
248 int ci, qtblno, i;
249 jpeg_component_info *compptr;
250 JQUANT_TBL *qtbl;
251 DCTELEM *dtbl;
252
253 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
254 ci++, compptr++) {
255 qtblno = compptr->quant_tbl_no;
256 /* Make sure specified quantization table is present */
257 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
258 cinfo->quant_tbl_ptrs[qtblno] == NULL)
259 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
260 qtbl = cinfo->quant_tbl_ptrs[qtblno];
261 /* Compute divisors for this quant table */
262 /* We may do this more than once for same table, but it's not a big deal */
263 switch (cinfo->dct_method) {
264 #ifdef DCT_ISLOW_SUPPORTED
265 case JDCT_ISLOW:
266 /* For LL&M IDCT method, divisors are equal to raw quantization
267 * coefficients multiplied by 8 (to counteract scaling).
268 */
269 if (fdct->divisors[qtblno] == NULL) {
270 fdct->divisors[qtblno] = (DCTELEM *)
271 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
272 (DCTSIZE2 * 4) * sizeof(DCTELEM));
273 }
274 dtbl = fdct->divisors[qtblno];
275 for (i = 0; i < DCTSIZE2; i++) {
276 #if BITS_IN_JSAMPLE == 8
277 if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
278 fdct->quantize == jsimd_quantize)
279 fdct->quantize = quantize;
280 #else
281 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
282 #endif
283 }
284 break;
285 #endif
286 #ifdef DCT_IFAST_SUPPORTED
287 case JDCT_IFAST:
288 {
289 /* For AA&N IDCT method, divisors are equal to quantization
290 * coefficients scaled by scalefactor[row]*scalefactor[col], where
291 * scalefactor[0] = 1
292 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
293 * We apply a further scale factor of 8.
294 */
295 #define CONST_BITS 14
296 static const INT16 aanscales[DCTSIZE2] = {
297 /* precomputed values scaled up by 14 bits */
298 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
299 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
300 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
301 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
302 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
303 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
304 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
305 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
306 };
307 SHIFT_TEMPS
308
309 if (fdct->divisors[qtblno] == NULL) {
310 fdct->divisors[qtblno] = (DCTELEM *)
311 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
312 (DCTSIZE2 * 4) * sizeof(DCTELEM));
313 }
314 dtbl = fdct->divisors[qtblno];
315 for (i = 0; i < DCTSIZE2; i++) {
316 #if BITS_IN_JSAMPLE == 8
317 if (!compute_reciprocal(
318 DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
319 (JLONG) aanscales[i]),
320 CONST_BITS-3), &dtbl[i]) &&
321 fdct->quantize == jsimd_quantize)
322 fdct->quantize = quantize;
323 #else
324 dtbl[i] = (DCTELEM)
325 DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
326 (JLONG) aanscales[i]),
327 CONST_BITS-3);
328 #endif
329 }
330 }
331 break;
332 #endif
333 #ifdef DCT_FLOAT_SUPPORTED
334 case JDCT_FLOAT:
335 {
336 /* For float AA&N IDCT method, divisors are equal to quantization
337 * coefficients scaled by scalefactor[row]*scalefactor[col], where
338 * scalefactor[0] = 1
339 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
340 * We apply a further scale factor of 8.
341 * What's actually stored is 1/divisor so that the inner loop can
342 * use a multiplication rather than a division.
343 */
344 FAST_FLOAT *fdtbl;
345 int row, col;
346 static const double aanscalefactor[DCTSIZE] = {
347 1.0, 1.387039845, 1.306562965, 1.175875602,
348 1.0, 0.785694958, 0.541196100, 0.275899379
349 };
350
351 if (fdct->float_divisors[qtblno] == NULL) {
352 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
353 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
354 DCTSIZE2 * sizeof(FAST_FLOAT));
355 }
356 fdtbl = fdct->float_divisors[qtblno];
357 i = 0;
358 for (row = 0; row < DCTSIZE; row++) {
359 for (col = 0; col < DCTSIZE; col++) {
360 fdtbl[i] = (FAST_FLOAT)
361 (1.0 / (((double) qtbl->quantval[i] *
362 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
363 i++;
364 }
365 }
366 }
367 break;
368 #endif
369 default:
370 ERREXIT(cinfo, JERR_NOT_COMPILED);
371 break;
372 }
373 }
374 }
375
376 METHODDEF(float)
catmull_rom(const DCTELEM value1,const DCTELEM value2,const DCTELEM value3,const DCTELEM value4,const float t,int size)377 catmull_rom(const DCTELEM value1, const DCTELEM value2, const DCTELEM value3, const DCTELEM value4, const float t, int size)
378 {
379 const int tan1 = (value3 - value1) * size;
380 const int tan2 = (value4 - value2) * size;
381
382 const float t2 = t * t;
383 const float t3 = t2 * t;
384
385 const float f1 = 2.f * t3 - 3.f * t2 + 1.f;
386 const float f2 = -2.f * t3 + 3.f * t2;
387 const float f3 = t3 - 2.f * t2 + t;
388 const float f4 = t3 - t2;
389
390 return value2 * f1 + tan1 * f3 +
391 value3 * f2 + tan2 * f4;
392 }
393
394 /** Prevents visible ringing artifacts near hard edges on white backgrounds.
395
396 1. JPEG can encode samples with higher values than it's possible to display (higher than 255 in RGB),
397 and the decoder will always clamp values to 0-255. To encode 255 you can use any value >= 255,
398 and distortions of the out-of-range values won't be visible as long as they decode to anything >= 255.
399
400 2. From DCT perspective pixels in a block are a waveform. Hard edges form square waves (bad).
401 Edges with white are similar to waveform clipping, and anti-clipping algorithms can turn square waves
402 into softer ones that compress better.
403
404 */
405 METHODDEF(void)
preprocess_deringing(DCTELEM * data,const JQUANT_TBL * quantization_table)406 preprocess_deringing(DCTELEM *data, const JQUANT_TBL *quantization_table)
407 {
408 const DCTELEM maxsample = 255 - CENTERJSAMPLE;
409 const int size = DCTSIZE * DCTSIZE;
410
411 /* Decoders don't handle overflow of DC very well, so calculate
412 maximum overflow that is safe to do without increasing DC out of range */
413 int sum = 0;
414 int maxsample_count = 0;
415 int i;
416 DCTELEM maxovershoot;
417 int n;
418
419 for(i=0; i < size; i++) {
420 sum += data[i];
421 if (data[i] >= maxsample) {
422 maxsample_count++;
423 }
424 }
425
426 /* If nothing reaches max value there's nothing to overshoot
427 and if the block is completely flat, it's already the best case. */
428 if (!maxsample_count || maxsample_count == size) {
429 return;
430 }
431
432 /* Too much overshoot is not good: increased amplitude will cost bits, and the cost is proportional to quantization (here using DC quant as a rough guide). */
433 maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
434
435 n = 0;
436 do {
437 int start, end, length;
438 DCTELEM f1, f2, l1, l2, fslope, lslope;
439 float step, position;
440
441 /* Pixels are traversed in zig-zag order to process them as a line */
442 if (data[jpeg_natural_order[n]] < maxsample) {
443 n++;
444 continue;
445 }
446
447 /* Find a run of maxsample pixels. Start is the first pixel inside the range, end the first pixel outside. */
448 start = n;
449 while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {}
450 end = n;
451
452 /* the run will be replaced with a catmull-rom interpolation of values from the edges */
453
454 /* Find suitable upward slope from pixels around edges of the run.
455 Just feeding nearby pixels as catmull rom points isn't good enough,
456 as slope with one sample before the edge may have been flattened by clipping,
457 and slope of two samples before the edge could be downward. */
458 f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]];
459 f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]];
460
461 l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]];
462 l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]];
463
464 fslope = MAX(f1-f2, maxsample-f1);
465 lslope = MAX(l1-l2, maxsample-l1);
466
467 /* if slope at the start/end is unknown, just make the curve symmetric */
468 if (start == 0) {
469 fslope = lslope;
470 }
471 if (end == size) {
472 lslope = fslope;
473 }
474
475 /* The curve fits better if first and last pixel is omitted */
476 length = end - start;
477 step = 1.f/(float)(length + 1);
478 position = step;
479
480 for(i = start; i < end; i++, position += step) {
481 DCTELEM tmp = ceilf(catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, length));
482 data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
483 }
484 n++;
485 }
486 while(n < size);
487 }
488
489 /*
490 Float version of preprocess_deringing()
491 */
492 METHODDEF(void)
float_preprocess_deringing(FAST_FLOAT * data,const JQUANT_TBL * quantization_table)493 float_preprocess_deringing(FAST_FLOAT *data, const JQUANT_TBL *quantization_table)
494 {
495 const FAST_FLOAT maxsample = 255 - CENTERJSAMPLE;
496 const int size = DCTSIZE * DCTSIZE;
497
498 FAST_FLOAT sum = 0;
499 int maxsample_count = 0;
500 int i;
501 int n;
502 FAST_FLOAT maxovershoot;
503
504 for(i=0; i < size; i++) {
505 sum += data[i];
506 if (data[i] >= maxsample) {
507 maxsample_count++;
508 }
509 }
510
511 if (!maxsample_count || maxsample_count == size) {
512 return;
513 }
514
515 maxovershoot = maxsample + MIN(MIN(31, 2*quantization_table->quantval[0]), (maxsample * size - sum) / maxsample_count);
516
517 n = 0;
518 do {
519 int start, end, length;
520 FAST_FLOAT f1, f2, l1, l2, fslope, lslope;
521 float step, position;
522
523 if (data[jpeg_natural_order[n]] < maxsample) {
524 n++;
525 continue;
526 }
527
528 start = n;
529 while(++n < size && data[jpeg_natural_order[n]] >= maxsample) {}
530 end = n;
531
532 f1 = data[jpeg_natural_order[start >= 1 ? start-1 : 0]];
533 f2 = data[jpeg_natural_order[start >= 2 ? start-2 : 0]];
534
535 l1 = data[jpeg_natural_order[end < size-1 ? end : size-1]];
536 l2 = data[jpeg_natural_order[end < size-2 ? end+1 : size-1]];
537
538 fslope = MAX(f1-f2, maxsample-f1);
539 lslope = MAX(l1-l2, maxsample-l1);
540
541 if (start == 0) {
542 fslope = lslope;
543 }
544 if (end == size) {
545 lslope = fslope;
546 }
547
548 length = end - start;
549 step = 1.f/(float)(length + 1);
550 position = step;
551
552 for(i = start; i < end; i++, position += step) {
553 FAST_FLOAT tmp = catmull_rom(maxsample - fslope, maxsample, maxsample, maxsample - lslope, position, length);
554 data[jpeg_natural_order[i]] = MIN(tmp, maxovershoot);
555 }
556 n++;
557 }
558 while(n < size);
559 }
560
561 /*
562 * Load data into workspace, applying unsigned->signed conversion.
563 */
564
565 METHODDEF(void)
convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)566 convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
567 {
568 register DCTELEM *workspaceptr;
569 register JSAMPROW elemptr;
570 register int elemr;
571
572 workspaceptr = workspace;
573 for (elemr = 0; elemr < DCTSIZE; elemr++) {
574 elemptr = sample_data[elemr] + start_col;
575
576 #if DCTSIZE == 8 /* unroll the inner loop */
577 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
578 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
579 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
580 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
581 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
582 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
583 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
584 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
585 #else
586 {
587 register int elemc;
588 for (elemc = DCTSIZE; elemc > 0; elemc--)
589 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
590 }
591 #endif
592 }
593 }
594
595
596 /*
597 * Quantize/descale the coefficients, and store into coef_blocks[].
598 */
599
600 METHODDEF(void)
quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)601 quantize (JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
602 {
603 int i;
604 DCTELEM temp;
605 JCOEFPTR output_ptr = coef_block;
606
607 #if BITS_IN_JSAMPLE == 8
608
609 UDCTELEM recip, corr;
610 int shift;
611 UDCTELEM2 product;
612
613 for (i = 0; i < DCTSIZE2; i++) {
614 temp = workspace[i];
615 recip = divisors[i + DCTSIZE2 * 0];
616 corr = divisors[i + DCTSIZE2 * 1];
617 shift = divisors[i + DCTSIZE2 * 3];
618
619 if (temp < 0) {
620 temp = -temp;
621 product = (UDCTELEM2)(temp + corr) * recip;
622 product >>= shift + sizeof(DCTELEM)*8;
623 temp = (DCTELEM)product;
624 temp = -temp;
625 } else {
626 product = (UDCTELEM2)(temp + corr) * recip;
627 product >>= shift + sizeof(DCTELEM)*8;
628 temp = (DCTELEM)product;
629 }
630 output_ptr[i] = (JCOEF) temp;
631 }
632
633 #else
634
635 register DCTELEM qval;
636
637 for (i = 0; i < DCTSIZE2; i++) {
638 qval = divisors[i];
639 temp = workspace[i];
640 /* Divide the coefficient value by qval, ensuring proper rounding.
641 * Since C does not specify the direction of rounding for negative
642 * quotients, we have to force the dividend positive for portability.
643 *
644 * In most files, at least half of the output values will be zero
645 * (at default quantization settings, more like three-quarters...)
646 * so we should ensure that this case is fast. On many machines,
647 * a comparison is enough cheaper than a divide to make a special test
648 * a win. Since both inputs will be nonnegative, we need only test
649 * for a < b to discover whether a/b is 0.
650 * If your machine's division is fast enough, define FAST_DIVIDE.
651 */
652 #ifdef FAST_DIVIDE
653 #define DIVIDE_BY(a,b) a /= b
654 #else
655 #define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
656 #endif
657 if (temp < 0) {
658 temp = -temp;
659 temp += qval>>1; /* for rounding */
660 DIVIDE_BY(temp, qval);
661 temp = -temp;
662 } else {
663 temp += qval>>1; /* for rounding */
664 DIVIDE_BY(temp, qval);
665 }
666 output_ptr[i] = (JCOEF) temp;
667 }
668
669 #endif
670
671 }
672
673
674 /*
675 * Perform forward DCT on one or more blocks of a component.
676 *
677 * The input samples are taken from the sample_data[] array starting at
678 * position start_row/start_col, and moving to the right for any additional
679 * blocks. The quantized coefficients are returned in coef_blocks[].
680 */
681
682 METHODDEF(void)
forward_DCT(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks,JBLOCKROW dst)683 forward_DCT (j_compress_ptr cinfo, jpeg_component_info *compptr,
684 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
685 JDIMENSION start_row, JDIMENSION start_col,
686 JDIMENSION num_blocks, JBLOCKROW dst)
687 /* This version is used for integer DCT implementations. */
688 {
689 /* This routine is heavily used, so it's worth coding it tightly. */
690 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
691 DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
692 JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
693 DCTELEM *workspace;
694 JDIMENSION bi;
695
696 /* Make sure the compiler doesn't look up these every pass */
697 forward_DCT_method_ptr do_dct = fdct->dct;
698 convsamp_method_ptr do_convsamp = fdct->convsamp;
699 preprocess_method_ptr do_preprocess = fdct->preprocess;
700 quantize_method_ptr do_quantize = fdct->quantize;
701 workspace = fdct->workspace;
702
703 sample_data += start_row; /* fold in the vertical offset once */
704
705 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
706 /* Load data into workspace, applying unsigned->signed conversion */
707 (*do_convsamp) (sample_data, start_col, workspace);
708
709 if (do_preprocess) {
710 (*do_preprocess) (workspace, qtbl);
711 }
712
713 /* Perform the DCT */
714 (*do_dct) (workspace);
715
716 /* Save unquantized transform coefficients for later trellis quantization */
717 if (dst) {
718 int i;
719 if (cinfo->dct_method == JDCT_IFAST) {
720 static const INT16 aanscales[DCTSIZE2] = {
721 /* precomputed values scaled up by 14 bits */
722 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
723 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
724 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
725 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
726 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
727 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
728 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
729 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
730 };
731
732 for (i = 0; i < DCTSIZE2; i++) {
733 int x = workspace[i];
734 int s = aanscales[i];
735 x = (x >= 0) ? (x * 32768 + s) / (2*s) : (x * 32768 - s) / (2*s);
736 dst[bi][i] = x;
737 }
738
739 } else {
740 for (i = 0; i < DCTSIZE2; i++) {
741 dst[bi][i] = workspace[i];
742 }
743 }
744 }
745
746 /* Quantize/descale the coefficients, and store into coef_blocks[] */
747 (*do_quantize) (coef_blocks[bi], divisors, workspace);
748
749 if (do_preprocess) {
750 int i;
751 int maxval = (1 << MAX_COEF_BITS) - 1;
752 for (i = 0; i < 64; i++) {
753 if (coef_blocks[bi][i] < -maxval)
754 coef_blocks[bi][i] = -maxval;
755 if (coef_blocks[bi][i] > maxval)
756 coef_blocks[bi][i] = maxval;
757 }
758 }
759 }
760 }
761
762
763 #ifdef DCT_FLOAT_SUPPORTED
764
765 METHODDEF(void)
convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)766 convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
767 FAST_FLOAT *workspace)
768 {
769 register FAST_FLOAT *workspaceptr;
770 register JSAMPROW elemptr;
771 register int elemr;
772
773 workspaceptr = workspace;
774 for (elemr = 0; elemr < DCTSIZE; elemr++) {
775 elemptr = sample_data[elemr] + start_col;
776 #if DCTSIZE == 8 /* unroll the inner loop */
777 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
778 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
779 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
780 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
781 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
782 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
783 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
784 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
785 #else
786 {
787 register int elemc;
788 for (elemc = DCTSIZE; elemc > 0; elemc--)
789 *workspaceptr++ = (FAST_FLOAT)
790 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
791 }
792 #endif
793 }
794 }
795
796
797 METHODDEF(void)
quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)798 quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
799 FAST_FLOAT *workspace)
800 {
801 register FAST_FLOAT temp;
802 register int i;
803 register JCOEFPTR output_ptr = coef_block;
804
805 for (i = 0; i < DCTSIZE2; i++) {
806 /* Apply the quantization and scaling factor */
807 temp = workspace[i] * divisors[i];
808
809 /* Round to nearest integer.
810 * Since C does not specify the direction of rounding for negative
811 * quotients, we have to force the dividend positive for portability.
812 * The maximum coefficient size is +-16K (for 12-bit data), so this
813 * code should work for either 16-bit or 32-bit ints.
814 */
815 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
816 }
817 }
818
819
820 METHODDEF(void)
forward_DCT_float(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY sample_data,JBLOCKROW coef_blocks,JDIMENSION start_row,JDIMENSION start_col,JDIMENSION num_blocks,JBLOCKROW dst)821 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info *compptr,
822 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
823 JDIMENSION start_row, JDIMENSION start_col,
824 JDIMENSION num_blocks, JBLOCKROW dst)
825 /* This version is used for floating-point DCT implementations. */
826 {
827 /* This routine is heavily used, so it's worth coding it tightly. */
828 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
829 FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
830 JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[compptr->quant_tbl_no];
831 FAST_FLOAT *workspace;
832 JDIMENSION bi;
833 float v;
834 int x;
835
836
837 /* Make sure the compiler doesn't look up these every pass */
838 float_DCT_method_ptr do_dct = fdct->float_dct;
839 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
840 float_preprocess_method_ptr do_preprocess = fdct->float_preprocess;
841 float_quantize_method_ptr do_quantize = fdct->float_quantize;
842 workspace = fdct->float_workspace;
843
844 sample_data += start_row; /* fold in the vertical offset once */
845
846 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
847 /* Load data into workspace, applying unsigned->signed conversion */
848 (*do_convsamp) (sample_data, start_col, workspace);
849
850 if (do_preprocess) {
851 (*do_preprocess) (workspace, qtbl);
852 }
853
854 /* Perform the DCT */
855 (*do_dct) (workspace);
856
857 /* Save unquantized transform coefficients for later trellis quantization */
858 /* Currently save as integer values. Could save float values but would require */
859 /* modifications to memory allocation and trellis quantization */
860
861 if (dst) {
862 int i;
863 static const double aanscalefactor[DCTSIZE] = {
864 1.0, 1.387039845, 1.306562965, 1.175875602,
865 1.0, 0.785694958, 0.541196100, 0.275899379
866 };
867
868 for (i = 0; i < DCTSIZE2; i++) {
869 v = workspace[i];
870 v /= aanscalefactor[i%8];
871 v /= aanscalefactor[i/8];
872 x = (v >= 0.0) ? (int)(v + 0.5) : (int)(v - 0.5);
873 dst[bi][i] = x;
874 }
875 }
876
877 /* Quantize/descale the coefficients, and store into coef_blocks[] */
878 (*do_quantize) (coef_blocks[bi], divisors, workspace);
879
880 if (do_preprocess) {
881 int i;
882 int maxval = (1 << MAX_COEF_BITS) - 1;
883 for (i = 0; i < 64; i++) {
884 if (coef_blocks[bi][i] < -maxval)
885 coef_blocks[bi][i] = -maxval;
886 if (coef_blocks[bi][i] > maxval)
887 coef_blocks[bi][i] = maxval;
888 }
889 }
890 }
891 }
892
893 #endif /* DCT_FLOAT_SUPPORTED */
894
895 #include "jpeg_nbits_table.h"
896
897 static const float jpeg_lambda_weights_flat[64] = {
898 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
899 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
900 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
901 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
902 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
903 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
904 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
905 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
906 };
907
908 static const float jpeg_lambda_weights_csf_luma[64] = {
909 3.35630f, 3.59892f, 3.20921f, 2.28102f, 1.42378f, 0.88079f, 0.58190f, 0.43454f,
910 3.59893f, 3.21284f, 2.71282f, 1.98092f, 1.30506f, 0.83852f, 0.56346f, 0.42146f,
911 3.20921f, 2.71282f, 2.12574f, 1.48616f, 0.99660f, 0.66132f, 0.45610f, 0.34609f,
912 2.28102f, 1.98092f, 1.48616f, 0.97492f, 0.64622f, 0.43812f, 0.31074f, 0.24072f,
913 1.42378f, 1.30506f, 0.99660f, 0.64623f, 0.42051f, 0.28446f, 0.20380f, 0.15975f,
914 0.88079f, 0.83852f, 0.66132f, 0.43812f, 0.28446f, 0.19092f, 0.13635f, 0.10701f,
915 0.58190f, 0.56346f, 0.45610f, 0.31074f, 0.20380f, 0.13635f, 0.09674f, 0.07558f,
916 0.43454f, 0.42146f, 0.34609f, 0.24072f, 0.15975f, 0.10701f, 0.07558f, 0.05875f,
917 };
918
919 #define DC_TRELLIS_MAX_CANDIDATES 9
920
get_num_dc_trellis_candidates(int dc_quantval)921 LOCAL(int) get_num_dc_trellis_candidates(int dc_quantval) {
922 /* Higher qualities can tolerate higher DC distortion */
923 return MIN(DC_TRELLIS_MAX_CANDIDATES, (2 + 60 / dc_quantval)|1);
924 }
925
926 GLOBAL(void)
quantize_trellis(j_compress_ptr cinfo,c_derived_tbl * dctbl,c_derived_tbl * actbl,JBLOCKROW coef_blocks,JBLOCKROW src,JDIMENSION num_blocks,JQUANT_TBL * qtbl,double * norm_src,double * norm_coef,JCOEF * last_dc_val,JBLOCKROW coef_blocks_above,JBLOCKROW src_above)927 quantize_trellis(j_compress_ptr cinfo, c_derived_tbl *dctbl, c_derived_tbl *actbl, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
928 JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
929 JBLOCKROW coef_blocks_above, JBLOCKROW src_above)
930 {
931 int i, j, k, l;
932 float accumulated_zero_dist[DCTSIZE2];
933 float accumulated_cost[DCTSIZE2];
934 int run_start[DCTSIZE2];
935 int bi;
936 float best_cost;
937 int last_coeff_idx; /* position of last nonzero coefficient */
938 float norm = 0.0;
939 float lambda_base;
940 float lambda;
941 float lambda_dc;
942 const float *lambda_tbl = (cinfo->master->use_lambda_weight_tbl) ?
943 jpeg_lambda_weights_csf_luma :
944 jpeg_lambda_weights_flat;
945 int Ss, Se;
946 float *accumulated_zero_block_cost = NULL;
947 float *accumulated_block_cost = NULL;
948 int *block_run_start = NULL;
949 int *requires_eob = NULL;
950 int has_eob;
951 float cost_all_zeros;
952 float best_cost_skip;
953 float cost;
954 int zero_run;
955 int run_bits;
956 int rate;
957 float *accumulated_dc_cost[DC_TRELLIS_MAX_CANDIDATES];
958 int *dc_cost_backtrack[DC_TRELLIS_MAX_CANDIDATES];
959 JCOEF *dc_candidate[DC_TRELLIS_MAX_CANDIDATES];
960 int mode = 1;
961 float lambda_table[DCTSIZE2];
962 const int dc_trellis_candidates = get_num_dc_trellis_candidates(qtbl->quantval[0]);
963
964 Ss = cinfo->Ss;
965 Se = cinfo->Se;
966 if (Ss == 0)
967 Ss = 1;
968 if (Se < Ss)
969 return;
970 if (cinfo->master->trellis_eob_opt) {
971 accumulated_zero_block_cost = (float *)malloc((num_blocks + 1) * sizeof(float));
972 accumulated_block_cost = (float *)malloc((num_blocks + 1) * sizeof(float));
973 block_run_start = (int *)malloc(num_blocks * sizeof(int));
974 requires_eob = (int *)malloc((num_blocks + 1) * sizeof(int));
975 if (!accumulated_zero_block_cost ||
976 !accumulated_block_cost ||
977 !block_run_start ||
978 !requires_eob) {
979 ERREXIT(cinfo, JERR_OUT_OF_MEMORY);
980 }
981
982 accumulated_zero_block_cost[0] = 0;
983 accumulated_block_cost[0] = 0;
984 requires_eob[0] = 0;
985 }
986
987 if (cinfo->master->trellis_quant_dc) {
988 for (i = 0; i < dc_trellis_candidates; i++) {
989 accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
990 dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
991 dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
992 if (!accumulated_dc_cost[i] ||
993 !dc_cost_backtrack[i] ||
994 !dc_candidate[i]) {
995 ERREXIT(cinfo, JERR_OUT_OF_MEMORY);
996 }
997 }
998 }
999
1000 norm = 0.0;
1001 for (i = 1; i < DCTSIZE2; i++) {
1002 norm += qtbl->quantval[i] * qtbl->quantval[i];
1003 }
1004 norm /= 63.0;
1005
1006 if (mode == 1) {
1007 lambda_base = 1.0;
1008 lambda_tbl = lambda_table;
1009 for (i = 0; i < DCTSIZE2; i++)
1010 lambda_table[i] = 1.0 / (qtbl->quantval[i] * qtbl->quantval[i]);
1011 } else
1012 lambda_base = 1.0 / norm;
1013
1014 for (bi = 0; bi < num_blocks; bi++) {
1015
1016 norm = 0.0;
1017 for (i = 1; i < DCTSIZE2; i++) {
1018 norm += src[bi][i] * src[bi][i];
1019 }
1020 norm /= 63.0;
1021
1022 if (cinfo->master->lambda_log_scale2 > 0.0)
1023 lambda = pow(2.0, cinfo->master->lambda_log_scale1) * lambda_base /
1024 (pow(2.0, cinfo->master->lambda_log_scale2) + norm);
1025 else
1026 lambda = pow(2.0, cinfo->master->lambda_log_scale1 - 12.0) * lambda_base;
1027
1028 lambda_dc = lambda * lambda_tbl[0];
1029
1030 accumulated_zero_dist[Ss-1] = 0.0;
1031 accumulated_cost[Ss-1] = 0.0;
1032
1033 /* Do DC coefficient */
1034 if (cinfo->master->trellis_quant_dc) {
1035 int sign = src[bi][0] >> 31;
1036 int x = abs(src[bi][0]);
1037 int q = 8 * qtbl->quantval[0];
1038 int qval;
1039 float dc_candidate_dist;
1040
1041 qval = (x + q/2) / q; /* quantized value (round nearest) */
1042 for (k = 0; k < dc_trellis_candidates; k++) {
1043 int delta;
1044 int dc_delta;
1045 int bits;
1046
1047 dc_candidate[k][bi] = qval - dc_trellis_candidates/2 + k;
1048 if (dc_candidate[k][bi] >= (1<<MAX_COEF_BITS))
1049 dc_candidate[k][bi] = (1<<MAX_COEF_BITS)-1;
1050 if (dc_candidate[k][bi] <= -(1<<MAX_COEF_BITS))
1051 dc_candidate[k][bi] = -(1<<MAX_COEF_BITS)+1;
1052
1053 delta = dc_candidate[k][bi] * q - x;
1054 dc_candidate_dist = delta * delta * lambda_dc;
1055 dc_candidate[k][bi] *= 1 + 2*sign;
1056
1057 /* Take into account DC differences */
1058 if (coef_blocks_above && src_above && cinfo->master->trellis_delta_dc_weight > 0.0) {
1059 int dc_above_orig;
1060 int dc_above_recon;
1061 int dc_orig;
1062 int dc_recon;
1063 float vertical_dist;
1064
1065 dc_above_orig = src_above[bi][0];
1066 dc_above_recon = coef_blocks_above[bi][0] * q;
1067 dc_orig = src[bi][0];
1068 dc_recon = dc_candidate[k][bi] * q;
1069 /* delta is difference of vertical gradients */
1070 delta = (dc_above_orig - dc_orig) - (dc_above_recon - dc_recon);
1071 vertical_dist = delta * delta * lambda_dc;
1072 dc_candidate_dist += cinfo->master->trellis_delta_dc_weight * (vertical_dist - dc_candidate_dist);
1073 }
1074
1075 if (bi == 0) {
1076 dc_delta = dc_candidate[k][bi] - *last_dc_val;
1077
1078 /* Derive number of suffix bits */
1079 bits = 0;
1080 dc_delta = abs(dc_delta);
1081 while (dc_delta) {
1082 dc_delta >>= 1;
1083 bits++;
1084 }
1085 cost = bits + dctbl->ehufsi[bits] + dc_candidate_dist;
1086 accumulated_dc_cost[k][0] = cost;
1087 dc_cost_backtrack[k][0] = -1;
1088 } else {
1089 for (l = 0; l < dc_trellis_candidates; l++) {
1090 dc_delta = dc_candidate[k][bi] - dc_candidate[l][bi-1];
1091
1092 /* Derive number of suffix bits */
1093 bits = 0;
1094 dc_delta = abs(dc_delta);
1095 while (dc_delta) {
1096 dc_delta >>= 1;
1097 bits++;
1098 }
1099 cost = bits + dctbl->ehufsi[bits] + dc_candidate_dist + accumulated_dc_cost[l][bi-1];
1100 if (l == 0 || cost < accumulated_dc_cost[k][bi]) {
1101 accumulated_dc_cost[k][bi] = cost;
1102 dc_cost_backtrack[k][bi] = l;
1103 }
1104 }
1105 }
1106 }
1107 }
1108
1109 /* Do AC coefficients */
1110 for (i = Ss; i <= Se; i++) {
1111 int z = jpeg_natural_order[i];
1112
1113 int sign = src[bi][z] >> 31;
1114 int x = abs(src[bi][z]);
1115 int q = 8 * qtbl->quantval[z];
1116 int candidate[16];
1117 int candidate_bits[16];
1118 float candidate_dist[16];
1119 int num_candidates;
1120 int qval;
1121
1122 accumulated_zero_dist[i] = x * x * lambda * lambda_tbl[z] + accumulated_zero_dist[i-1];
1123
1124 qval = (x + q/2) / q; /* quantized value (round nearest) */
1125
1126 if (qval == 0) {
1127 coef_blocks[bi][z] = 0;
1128 accumulated_cost[i] = 1e38; /* Shouldn't be needed */
1129 continue;
1130 }
1131
1132 if (qval >= (1<<MAX_COEF_BITS))
1133 qval = (1<<MAX_COEF_BITS)-1;
1134
1135 num_candidates = jpeg_nbits_table[qval];
1136 for (k = 0; k < num_candidates; k++) {
1137 int delta;
1138 candidate[k] = (k < num_candidates - 1) ? (2 << k) - 1 : qval;
1139 delta = candidate[k] * q - x;
1140 candidate_bits[k] = k+1;
1141 candidate_dist[k] = delta * delta * lambda * lambda_tbl[z];
1142 }
1143
1144 accumulated_cost[i] = 1e38;
1145
1146 for (j = Ss-1; j < i; j++) {
1147 int zz = jpeg_natural_order[j];
1148 if (j != Ss-1 && coef_blocks[bi][zz] == 0)
1149 continue;
1150
1151 zero_run = i - 1 - j;
1152 if ((zero_run >> 4) && actbl->ehufsi[0xf0] == 0)
1153 continue;
1154
1155 run_bits = (zero_run >> 4) * actbl->ehufsi[0xf0];
1156 zero_run &= 15;
1157
1158 for (k = 0; k < num_candidates; k++) {
1159 int coef_bits = actbl->ehufsi[16 * zero_run + candidate_bits[k]];
1160 if (coef_bits == 0)
1161 continue;
1162
1163 rate = coef_bits + candidate_bits[k] + run_bits;
1164 cost = rate + candidate_dist[k];
1165 cost += accumulated_zero_dist[i-1] - accumulated_zero_dist[j] + accumulated_cost[j];
1166
1167 if (cost < accumulated_cost[i]) {
1168 coef_blocks[bi][z] = (candidate[k] ^ sign) - sign;
1169 accumulated_cost[i] = cost;
1170 run_start[i] = j;
1171 }
1172 }
1173 }
1174 }
1175
1176 last_coeff_idx = Ss-1;
1177 best_cost = accumulated_zero_dist[Se] + actbl->ehufsi[0];
1178 cost_all_zeros = accumulated_zero_dist[Se];
1179 best_cost_skip = cost_all_zeros;
1180
1181 for (i = Ss; i <= Se; i++) {
1182 int z = jpeg_natural_order[i];
1183 if (coef_blocks[bi][z] != 0) {
1184 float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i];
1185 float cost_wo_eob = cost;
1186
1187 if (i < Se)
1188 cost += actbl->ehufsi[0];
1189
1190 if (cost < best_cost) {
1191 best_cost = cost;
1192 last_coeff_idx = i;
1193 best_cost_skip = cost_wo_eob;
1194 }
1195 }
1196 }
1197
1198 has_eob = (last_coeff_idx < Se) + (last_coeff_idx == Ss-1);
1199
1200 /* Zero out coefficients that are part of runs */
1201 i = Se;
1202 while (i >= Ss)
1203 {
1204 while (i > last_coeff_idx) {
1205 int z = jpeg_natural_order[i];
1206 coef_blocks[bi][z] = 0;
1207 i--;
1208 }
1209 last_coeff_idx = run_start[i];
1210 i--;
1211 }
1212
1213 if (cinfo->master->trellis_eob_opt) {
1214 accumulated_zero_block_cost[bi+1] = accumulated_zero_block_cost[bi];
1215 accumulated_zero_block_cost[bi+1] += cost_all_zeros;
1216 requires_eob[bi+1] = has_eob;
1217
1218 best_cost = 1e38;
1219
1220 if (has_eob != 2) {
1221 for (i = 0; i <= bi; i++) {
1222 int zero_block_run;
1223 int nbits;
1224 float cost;
1225
1226 if (requires_eob[i] == 2)
1227 continue;
1228
1229 cost = best_cost_skip; /* cost of coding a nonzero block */
1230 cost += accumulated_zero_block_cost[bi];
1231 cost -= accumulated_zero_block_cost[i];
1232 cost += accumulated_block_cost[i];
1233 zero_block_run = bi - i + requires_eob[i];
1234 nbits = jpeg_nbits_table[zero_block_run];
1235 cost += actbl->ehufsi[16*nbits] + nbits;
1236
1237 if (cost < best_cost) {
1238 block_run_start[bi] = i;
1239 best_cost = cost;
1240 accumulated_block_cost[bi+1] = cost;
1241 }
1242 }
1243 }
1244 }
1245 }
1246
1247 if (cinfo->master->trellis_eob_opt) {
1248 int last_block = num_blocks;
1249 best_cost = 1e38;
1250
1251 for (i = 0; i <= num_blocks; i++) {
1252 int zero_block_run;
1253 int nbits;
1254 float cost = 0.0;
1255
1256 if (requires_eob[i] == 2)
1257 continue;
1258
1259 cost += accumulated_zero_block_cost[num_blocks];
1260 cost -= accumulated_zero_block_cost[i];
1261 zero_block_run = num_blocks - i + requires_eob[i];
1262 nbits = jpeg_nbits_table[zero_block_run];
1263 cost += actbl->ehufsi[16*nbits] + nbits;
1264 if (cost < best_cost) {
1265 best_cost = cost;
1266 last_block = i;
1267 }
1268 }
1269 last_block--;
1270 bi = num_blocks - 1;
1271 while (bi >= 0) {
1272 while (bi > last_block) {
1273 for (j = Ss; j <= Se; j++) {
1274 int z = jpeg_natural_order[j];
1275 coef_blocks[bi][z] = 0;
1276 }
1277 bi--;
1278 }
1279 last_block = block_run_start[bi]-1;
1280 bi--;
1281 }
1282 free(accumulated_zero_block_cost);
1283 free(accumulated_block_cost);
1284 free(block_run_start);
1285 free(requires_eob);
1286 }
1287
1288 if (cinfo->master->trellis_q_opt) {
1289 for (bi = 0; bi < num_blocks; bi++) {
1290 for (i = 1; i < DCTSIZE2; i++) {
1291 norm_src[i] += src[bi][i] * coef_blocks[bi][i];
1292 norm_coef[i] += 8 * coef_blocks[bi][i] * coef_blocks[bi][i];
1293 }
1294 }
1295 }
1296
1297 if (cinfo->master->trellis_quant_dc) {
1298 j = 0;
1299 for (i = 1; i < dc_trellis_candidates; i++) {
1300 if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
1301 j = i;
1302 }
1303 for (bi = num_blocks-1; bi >= 0; bi--) {
1304 coef_blocks[bi][0] = dc_candidate[j][bi];
1305 j = dc_cost_backtrack[j][bi];
1306 }
1307
1308 /* Save DC predictor */
1309 *last_dc_val = coef_blocks[num_blocks-1][0];
1310
1311 for (i = 0; i < dc_trellis_candidates; i++) {
1312 free(accumulated_dc_cost[i]);
1313 free(dc_cost_backtrack[i]);
1314 free(dc_candidate[i]);
1315 }
1316 }
1317
1318 }
1319
1320 #ifdef C_ARITH_CODING_SUPPORTED
1321 GLOBAL(void)
quantize_trellis_arith(j_compress_ptr cinfo,arith_rates * r,JBLOCKROW coef_blocks,JBLOCKROW src,JDIMENSION num_blocks,JQUANT_TBL * qtbl,double * norm_src,double * norm_coef,JCOEF * last_dc_val,JBLOCKROW coef_blocks_above,JBLOCKROW src_above)1322 quantize_trellis_arith(j_compress_ptr cinfo, arith_rates *r, JBLOCKROW coef_blocks, JBLOCKROW src, JDIMENSION num_blocks,
1323 JQUANT_TBL * qtbl, double *norm_src, double *norm_coef, JCOEF *last_dc_val,
1324 JBLOCKROW coef_blocks_above, JBLOCKROW src_above)
1325 {
1326 int i, j, k, l;
1327 float accumulated_zero_dist[DCTSIZE2];
1328 float accumulated_cost[DCTSIZE2];
1329 int run_start[DCTSIZE2];
1330 int bi;
1331 float best_cost;
1332 int last_coeff_idx; /* position of last nonzero coefficient */
1333 float norm = 0.0;
1334 float lambda_base;
1335 float lambda;
1336 float lambda_dc;
1337 const float *lambda_tbl = (cinfo->master->use_lambda_weight_tbl) ?
1338 jpeg_lambda_weights_csf_luma :
1339 jpeg_lambda_weights_flat;
1340 int Ss, Se;
1341 float cost;
1342 float run_bits;
1343 int rate;
1344 float *accumulated_dc_cost[DC_TRELLIS_MAX_CANDIDATES];
1345 int *dc_cost_backtrack[DC_TRELLIS_MAX_CANDIDATES];
1346 JCOEF *dc_candidate[DC_TRELLIS_MAX_CANDIDATES];
1347 int *dc_context[DC_TRELLIS_MAX_CANDIDATES];
1348
1349 int mode = 1;
1350 float lambda_table[DCTSIZE2];
1351 const int dc_trellis_candidates = get_num_dc_trellis_candidates(qtbl->quantval[0]);
1352
1353 Ss = cinfo->Ss;
1354 Se = cinfo->Se;
1355 if (Ss == 0)
1356 Ss = 1;
1357 if (Se < Ss)
1358 return;
1359
1360 if (cinfo->master->trellis_quant_dc) {
1361 for (i = 0; i < dc_trellis_candidates; i++) {
1362 accumulated_dc_cost[i] = (float *)malloc(num_blocks * sizeof(float));
1363 dc_cost_backtrack[i] = (int *)malloc(num_blocks * sizeof(int));
1364 dc_candidate[i] = (JCOEF *)malloc(num_blocks * sizeof(JCOEF));
1365 dc_context[i] = (int *)malloc(num_blocks * sizeof(int));
1366 if (!accumulated_dc_cost[i] ||
1367 !dc_cost_backtrack[i] ||
1368 !dc_candidate[i] ||
1369 !dc_context[i]) {
1370 ERREXIT(cinfo, JERR_OUT_OF_MEMORY);
1371 }
1372 }
1373 }
1374
1375 norm = 0.0;
1376 for (i = 1; i < DCTSIZE2; i++) {
1377 norm += qtbl->quantval[i] * qtbl->quantval[i];
1378 }
1379 norm /= 63.0;
1380
1381 if (mode == 1) {
1382 lambda_base = 1.0;
1383 lambda_tbl = lambda_table;
1384 for (i = 0; i < DCTSIZE2; i++)
1385 lambda_table[i] = 1.0 / (qtbl->quantval[i] * qtbl->quantval[i]);
1386 } else
1387 lambda_base = 1.0 / norm;
1388
1389 for (bi = 0; bi < num_blocks; bi++) {
1390
1391 norm = 0.0;
1392 for (i = 1; i < DCTSIZE2; i++) {
1393 norm += src[bi][i] * src[bi][i];
1394 }
1395 norm /= 63.0;
1396
1397 if (cinfo->master->lambda_log_scale2 > 0.0)
1398 lambda = pow(2.0, cinfo->master->lambda_log_scale1) * lambda_base /
1399 (pow(2.0, cinfo->master->lambda_log_scale2) + norm);
1400 else
1401 lambda = pow(2.0, cinfo->master->lambda_log_scale1 - 12.0) * lambda_base;
1402
1403 lambda_dc = lambda * lambda_tbl[0];
1404
1405 accumulated_zero_dist[Ss-1] = 0.0;
1406 accumulated_cost[Ss-1] = 0.0;
1407
1408 /* Do DC coefficient */
1409 if (cinfo->master->trellis_quant_dc) {
1410 int sign = src[bi][0] >> 31;
1411 int x = abs(src[bi][0]);
1412 int q = 8 * qtbl->quantval[0];
1413 int qval;
1414 float dc_candidate_dist;
1415
1416 qval = (x + q/2) / q; /* quantized value (round nearest) */
1417
1418 /* loop over candidates in current block */
1419 for (k = 0; k < dc_trellis_candidates; k++) {
1420 int delta;
1421 int dc_delta;
1422 float bits;
1423 int m;
1424 int v2;
1425
1426 dc_candidate[k][bi] = qval - dc_trellis_candidates/2 + k;
1427 delta = dc_candidate[k][bi] * q - x;
1428 dc_candidate_dist = delta * delta * lambda_dc;
1429 dc_candidate[k][bi] *= 1 + 2*sign;
1430
1431 /* Take into account DC differences */
1432 if (coef_blocks_above && src_above && cinfo->master->trellis_delta_dc_weight > 0.0) {
1433 int dc_above_orig;
1434 int dc_above_recon;
1435 int dc_orig;
1436 int dc_recon;
1437 float vertical_dist;
1438
1439 dc_above_orig = src_above[bi][0];
1440 dc_above_recon = coef_blocks_above[bi][0] * q;
1441 dc_orig = src[bi][0];
1442 dc_recon = dc_candidate[k][bi] * q;
1443 /* delta is difference of vertical gradients */
1444 delta = (dc_above_orig - dc_orig) - (dc_above_recon - dc_recon);
1445 vertical_dist = delta * delta * lambda_dc;
1446 dc_candidate_dist += cinfo->master->trellis_delta_dc_weight * (vertical_dist - dc_candidate_dist);
1447 }
1448
1449 /* loop of candidates from previous block */
1450 for (l = 0; l < (bi == 0 ? 1 : dc_trellis_candidates); l++) {
1451 int dc_pred = (bi == 0 ? *last_dc_val : dc_candidate[l][bi-1]);
1452 int updated_dc_context = 0;
1453 int st = (bi == 0) ? 0 : dc_context[l][bi-1];
1454 dc_delta = dc_candidate[k][bi] - dc_pred;
1455
1456 bits = r->rate_dc[st][dc_delta != 0];
1457
1458 if (dc_delta != 0) {
1459 bits += r->rate_dc[st+1][dc_delta < 0];
1460 st += 2 + (dc_delta < 0);
1461 updated_dc_context = (dc_delta < 0) ? 8 : 4;
1462
1463 dc_delta = abs(dc_delta);
1464
1465 m = 0;
1466 if (dc_delta -= 1) {
1467 bits += r->rate_dc[st][1];
1468 st = 20;
1469 m = 1;
1470 v2 = dc_delta;
1471 while (v2 >>= 1) {
1472 bits += r->rate_dc[st][1];
1473 m <<= 1;
1474 st++;
1475 }
1476 }
1477 bits += r->rate_dc[st][0];
1478
1479 if (m < (int) ((1L << r->arith_dc_L) >> 1))
1480 updated_dc_context = 0; /* zero diff category */
1481 else if (m > (int) ((1L << r->arith_dc_U) >> 1))
1482 updated_dc_context += 8; /* large diff category */
1483
1484 st += 14;
1485 while (m >>= 1)
1486 bits += r->rate_dc[st][(m & dc_delta) ? 1 : 0];
1487 }
1488
1489 cost = bits + dc_candidate_dist;
1490 if (bi != 0)
1491 cost += accumulated_dc_cost[l][bi-1];
1492
1493 if (l == 0 || cost < accumulated_dc_cost[k][bi]) {
1494 accumulated_dc_cost[k][bi] = cost;
1495 dc_cost_backtrack[k][bi] = (bi == 0 ? -1 : l);
1496 dc_context[k][bi] = updated_dc_context;
1497 }
1498 }
1499 }
1500 }
1501
1502 /* Do AC coefficients */
1503 for (i = Ss; i <= Se; i++) {
1504 int z = jpeg_natural_order[i];
1505
1506 int sign = src[bi][z] >> 31;
1507 int x = abs(src[bi][z]);
1508 int q = 8 * qtbl->quantval[z];
1509 int candidate[16];
1510 float candidate_dist[16];
1511 int num_candidates;
1512 int qval;
1513 int delta;
1514
1515 accumulated_zero_dist[i] = x * x * lambda * lambda_tbl[z] + accumulated_zero_dist[i-1];
1516
1517 qval = (x + q/2) / q; /* quantized value (round nearest) */
1518
1519 if (qval == 0) {
1520 coef_blocks[bi][z] = 0;
1521 accumulated_cost[i] = 1e38; /* Shouldn't be needed */
1522 continue;
1523 }
1524
1525 k = 0;
1526 candidate[k] = qval;
1527 delta = candidate[k] * q - x;
1528 candidate_dist[k] = delta * delta * lambda * lambda_tbl[z];
1529 k++;
1530 if (qval > 1) {
1531 candidate[k] = qval - 1;
1532 delta = candidate[k] * q - x;
1533 candidate_dist[k] = delta * delta * lambda * lambda_tbl[z];
1534 k++;
1535 }
1536 num_candidates = k;
1537
1538 accumulated_cost[i] = 1e38;
1539
1540 for (j = Ss-1; j < i; j++) {
1541 int zz = jpeg_natural_order[j];
1542 if (j != Ss-1 && coef_blocks[bi][zz] == 0)
1543 continue;
1544
1545 run_bits = r->rate_ac[3*j][0]; /* EOB */
1546 for (k = j+1; k < i; k++)
1547 run_bits += r->rate_ac[3*(k-1)+1][0];
1548 run_bits += r->rate_ac[3*(i-1)+1][1];
1549
1550 for (k = 0; k < num_candidates; k++) {
1551 float coef_bits = 1.0; /* sign bit */
1552 int v = candidate[k];
1553 int v2;
1554 int m;
1555 int st;
1556
1557 st = 3*(i-1)+2;
1558 m = 0;
1559 if (v -= 1) {
1560 coef_bits += r->rate_ac[st][1];
1561 m = 1;
1562 v2 = v;
1563 if (v2 >>= 1) {
1564 coef_bits += r->rate_ac[st][1];
1565 m <<= 1;
1566 st = (i <= r->arith_ac_K) ? 189 : 217;
1567 while (v2 >>= 1) {
1568 coef_bits += r->rate_ac[st][1];
1569 m <<= 1;
1570 st++;
1571 }
1572 }
1573 }
1574 coef_bits += r->rate_ac[st][0];
1575 st += 14;
1576 while (m >>= 1)
1577 coef_bits += r->rate_ac[st][(m & v) ? 1 : 0];
1578
1579 rate = coef_bits + run_bits;
1580 cost = rate + candidate_dist[k];
1581 cost += accumulated_zero_dist[i-1] - accumulated_zero_dist[j] + accumulated_cost[j];
1582
1583 if (cost < accumulated_cost[i]) {
1584 coef_blocks[bi][z] = (candidate[k] ^ sign) - sign;
1585 accumulated_cost[i] = cost;
1586 run_start[i] = j;
1587 }
1588 }
1589 }
1590 }
1591
1592 last_coeff_idx = Ss-1;
1593 best_cost = accumulated_zero_dist[Se] + r->rate_ac[0][1];
1594
1595 for (i = Ss; i <= Se; i++) {
1596 int z = jpeg_natural_order[i];
1597 if (coef_blocks[bi][z] != 0) {
1598 float cost = accumulated_cost[i] + accumulated_zero_dist[Se] - accumulated_zero_dist[i];
1599
1600 if (i < Se)
1601 cost += r->rate_ac[3*(i-1)][1];
1602
1603 if (cost < best_cost) {
1604 best_cost = cost;
1605 last_coeff_idx = i;
1606 }
1607 }
1608 }
1609
1610 /* Zero out coefficients that are part of runs */
1611 i = Se;
1612 while (i >= Ss)
1613 {
1614 while (i > last_coeff_idx) {
1615 int z = jpeg_natural_order[i];
1616 coef_blocks[bi][z] = 0;
1617 i--;
1618 }
1619 last_coeff_idx = run_start[i];
1620 i--;
1621 }
1622
1623 }
1624
1625 if (cinfo->master->trellis_q_opt) {
1626 for (bi = 0; bi < num_blocks; bi++) {
1627 for (i = 1; i < DCTSIZE2; i++) {
1628 norm_src[i] += src[bi][i] * coef_blocks[bi][i];
1629 norm_coef[i] += 8 * coef_blocks[bi][i] * coef_blocks[bi][i];
1630 }
1631 }
1632 }
1633
1634 if (cinfo->master->trellis_quant_dc) {
1635 j = 0;
1636 for (i = 1; i < dc_trellis_candidates; i++) {
1637 if (accumulated_dc_cost[i][num_blocks-1] < accumulated_dc_cost[j][num_blocks-1])
1638 j = i;
1639 }
1640 for (bi = num_blocks-1; bi >= 0; bi--) {
1641 coef_blocks[bi][0] = dc_candidate[j][bi];
1642 j = dc_cost_backtrack[j][bi];
1643 }
1644
1645 /* Save DC predictor */
1646 *last_dc_val = coef_blocks[num_blocks-1][0];
1647
1648 for (i = 0; i < dc_trellis_candidates; i++) {
1649 free(accumulated_dc_cost[i]);
1650 free(dc_cost_backtrack[i]);
1651 free(dc_candidate[i]);
1652 free(dc_context[i]);
1653 }
1654 }
1655 }
1656 #endif
1657
1658 /*
1659 * Initialize FDCT manager.
1660 */
1661
1662 GLOBAL(void)
jinit_forward_dct(j_compress_ptr cinfo)1663 jinit_forward_dct (j_compress_ptr cinfo)
1664 {
1665 my_fdct_ptr fdct;
1666 int i;
1667
1668 fdct = (my_fdct_ptr)
1669 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
1670 sizeof(my_fdct_controller));
1671 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
1672 fdct->pub.start_pass = start_pass_fdctmgr;
1673
1674 /* First determine the DCT... */
1675 switch (cinfo->dct_method) {
1676 #ifdef DCT_ISLOW_SUPPORTED
1677 case JDCT_ISLOW:
1678 fdct->pub.forward_DCT = forward_DCT;
1679 if (jsimd_can_fdct_islow())
1680 fdct->dct = jsimd_fdct_islow;
1681 else
1682 fdct->dct = jpeg_fdct_islow;
1683 break;
1684 #endif
1685 #ifdef DCT_IFAST_SUPPORTED
1686 case JDCT_IFAST:
1687 fdct->pub.forward_DCT = forward_DCT;
1688 if (jsimd_can_fdct_ifast())
1689 fdct->dct = jsimd_fdct_ifast;
1690 else
1691 fdct->dct = jpeg_fdct_ifast;
1692 break;
1693 #endif
1694 #ifdef DCT_FLOAT_SUPPORTED
1695 case JDCT_FLOAT:
1696 fdct->pub.forward_DCT = forward_DCT_float;
1697 if (jsimd_can_fdct_float())
1698 fdct->float_dct = jsimd_fdct_float;
1699 else
1700 fdct->float_dct = jpeg_fdct_float;
1701 break;
1702 #endif
1703 default:
1704 ERREXIT(cinfo, JERR_NOT_COMPILED);
1705 break;
1706 }
1707
1708 /* ...then the supporting stages. */
1709 switch (cinfo->dct_method) {
1710 #ifdef DCT_ISLOW_SUPPORTED
1711 case JDCT_ISLOW:
1712 #endif
1713 #ifdef DCT_IFAST_SUPPORTED
1714 case JDCT_IFAST:
1715 #endif
1716 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
1717 if (jsimd_can_convsamp())
1718 fdct->convsamp = jsimd_convsamp;
1719 else
1720 fdct->convsamp = convsamp;
1721
1722 if (cinfo->master->overshoot_deringing) {
1723 fdct->preprocess = preprocess_deringing;
1724 } else {
1725 fdct->preprocess = NULL;
1726 }
1727
1728 if (jsimd_can_quantize())
1729 fdct->quantize = jsimd_quantize;
1730 else
1731 fdct->quantize = quantize;
1732 break;
1733 #endif
1734 #ifdef DCT_FLOAT_SUPPORTED
1735 case JDCT_FLOAT:
1736 if (jsimd_can_convsamp_float())
1737 fdct->float_convsamp = jsimd_convsamp_float;
1738 else
1739 fdct->float_convsamp = convsamp_float;
1740
1741 if (cinfo->master->overshoot_deringing) {
1742 fdct->float_preprocess = float_preprocess_deringing;
1743 } else {
1744 fdct->float_preprocess = NULL;
1745 }
1746
1747 if (jsimd_can_quantize_float())
1748 fdct->float_quantize = jsimd_quantize_float;
1749 else
1750 fdct->float_quantize = quantize_float;
1751 break;
1752 #endif
1753 default:
1754 ERREXIT(cinfo, JERR_NOT_COMPILED);
1755 break;
1756 }
1757
1758 /* Allocate workspace memory */
1759 #ifdef DCT_FLOAT_SUPPORTED
1760 if (cinfo->dct_method == JDCT_FLOAT)
1761 fdct->float_workspace = (FAST_FLOAT *)
1762 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
1763 sizeof(FAST_FLOAT) * DCTSIZE2);
1764 else
1765 #endif
1766 fdct->workspace = (DCTELEM *)
1767 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
1768 sizeof(DCTELEM) * DCTSIZE2);
1769
1770 /* Mark divisor tables unallocated */
1771 for (i = 0; i < NUM_QUANT_TBLS; i++) {
1772 fdct->divisors[i] = NULL;
1773 #ifdef DCT_FLOAT_SUPPORTED
1774 fdct->float_divisors[i] = NULL;
1775 #endif
1776 }
1777 }
1778