1 /*
2  * jcphuff.c
3  *
4  * This file was part of the Independent JPEG Group's software:
5  * Copyright (C) 1995-1997, Thomas G. Lane.
6  * libjpeg-turbo Modifications:
7  * Copyright (C) 2011, 2015, 2018, D. R. Commander.
8  * Copyright (C) 2016, 2018, Matthieu Darbois.
9  * For conditions of distribution and use, see the accompanying README.ijg
10  * file.
11  *
12  * This file contains Huffman entropy encoding routines for progressive JPEG.
13  *
14  * We do not support output suspension in this module, since the library
15  * currently does not allow multiple-scan files to be written with output
16  * suspension.
17  */
18 
19 #define JPEG_INTERNALS
20 #include "jinclude.h"
21 #include "jpeglib.h"
22 #include "jsimd.h"
23 #include "jconfigint.h"
24 #include <limits.h>
25 
26 #ifdef HAVE_INTRIN_H
27 #include <intrin.h>
28 #ifdef _MSC_VER
29 #ifdef HAVE_BITSCANFORWARD64
30 #pragma intrinsic(_BitScanForward64)
31 #endif
32 #ifdef HAVE_BITSCANFORWARD
33 #pragma intrinsic(_BitScanForward)
34 #endif
35 #endif
36 #endif
37 
38 #ifdef C_PROGRESSIVE_SUPPORTED
39 
40 /*
41  * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
42  * used for bit counting rather than the lookup table.  This will reduce the
43  * memory footprint by 64k, which is important for some mobile applications
44  * that create many isolated instances of libjpeg-turbo (web browsers, for
45  * instance.)  This may improve performance on some mobile platforms as well.
46  * This feature is enabled by default only on Arm processors, because some x86
47  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
48  * shown to have a significant performance impact even on the x86 chips that
49  * have a fast implementation of it.  When building for Armv6, you can
50  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
51  * flags (this defines __thumb__).
52  */
53 
54 /* NOTE: Both GCC and Clang define __GNUC__ */
55 #if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
56 #if !defined(__thumb__) || defined(__thumb2__)
57 #define USE_CLZ_INTRINSIC
58 #endif
59 #endif
60 
61 #ifdef USE_CLZ_INTRINSIC
62 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
63 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
64 #else
65 #include "jpeg_nbits_table.h"
66 #define JPEG_NBITS(x)          (jpeg_nbits_table[x])
67 #define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
68 #endif
69 
70 
71 /* Expanded entropy encoder object for progressive Huffman encoding. */
72 
73 typedef struct {
74   struct jpeg_entropy_encoder pub; /* public fields */
75 
76   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
77   void (*AC_first_prepare) (const JCOEF *block,
78                             const int *jpeg_natural_order_start, int Sl,
79                             int Al, JCOEF *values, size_t *zerobits);
80   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
81   int (*AC_refine_prepare) (const JCOEF *block,
82                             const int *jpeg_natural_order_start, int Sl,
83                             int Al, JCOEF *absvalues, size_t *bits);
84 
85   /* Mode flag: TRUE for optimization, FALSE for actual data output */
86   boolean gather_statistics;
87 
88   /* Bit-level coding status.
89    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
90    */
91   JOCTET *next_output_byte;     /* => next byte to write in buffer */
92   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
93   size_t put_buffer;            /* current bit-accumulation buffer */
94   int put_bits;                 /* # of bits now in it */
95   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
96 
97   /* Coding status for DC components */
98   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
99 
100   /* Coding status for AC components */
101   int ac_tbl_no;                /* the table number of the single component */
102   unsigned int EOBRUN;          /* run length of EOBs */
103   unsigned int BE;              /* # of buffered correction bits before MCU */
104   char *bit_buffer;             /* buffer for correction bits (1 per char) */
105   /* packing correction bits tightly would save some space but cost time... */
106 
107   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
108   int next_restart_num;         /* next restart number to write (0-7) */
109 
110   /* Pointers to derived tables (these workspaces have image lifespan).
111    * Since any one scan codes only DC or only AC, we only need one set
112    * of tables, not one for DC and one for AC.
113    */
114   c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
115 
116   /* Statistics tables for optimization; again, one set is enough */
117   long *count_ptrs[NUM_HUFF_TBLS];
118 } phuff_entropy_encoder;
119 
120 typedef phuff_entropy_encoder *phuff_entropy_ptr;
121 
122 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
123  * buffer can hold.  Larger sizes may slightly improve compression, but
124  * 1000 is already well into the realm of overkill.
125  * The minimum safe size is 64 bits.
126  */
127 
128 #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
129 
130 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
131  * We assume that int right shift is unsigned if JLONG right shift is,
132  * which should be safe.
133  */
134 
135 #ifdef RIGHT_SHIFT_IS_UNSIGNED
136 #define ISHIFT_TEMPS    int ishift_temp;
137 #define IRIGHT_SHIFT(x, shft) \
138   ((ishift_temp = (x)) < 0 ? \
139    (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
140    (ishift_temp >> (shft)))
141 #else
142 #define ISHIFT_TEMPS
143 #define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
144 #endif
145 
146 #define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
147 
148 /* Forward declarations */
149 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
150                                        JBLOCKROW *MCU_data);
151 METHODDEF(void) encode_mcu_AC_first_prepare
152   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
153    JCOEF *values, size_t *zerobits);
154 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
155                                        JBLOCKROW *MCU_data);
156 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
157                                         JBLOCKROW *MCU_data);
158 METHODDEF(int) encode_mcu_AC_refine_prepare
159   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
160    JCOEF *absvalues, size_t *bits);
161 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
162                                         JBLOCKROW *MCU_data);
163 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
164 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
165 
166 
167 /* Count bit loop zeroes */
168 INLINE
METHODDEF(int)169 METHODDEF(int)
170 count_zeroes(size_t *x)
171 {
172   int result;
173 #if defined(HAVE_BUILTIN_CTZL)
174   result = __builtin_ctzl(*x);
175   *x >>= result;
176 #elif defined(HAVE_BITSCANFORWARD64)
177   _BitScanForward64(&result, *x);
178   *x >>= result;
179 #elif defined(HAVE_BITSCANFORWARD)
180   _BitScanForward(&result, *x);
181   *x >>= result;
182 #else
183   result = 0;
184   while ((*x & 1) == 0) {
185     ++result;
186     *x >>= 1;
187   }
188 #endif
189   return result;
190 }
191 
192 
193 /*
194  * Initialize for a Huffman-compressed scan using progressive JPEG.
195  */
196 
197 METHODDEF(void)
start_pass_phuff(j_compress_ptr cinfo,boolean gather_statistics)198 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
199 {
200   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
201   boolean is_DC_band;
202   int ci, tbl;
203   jpeg_component_info *compptr;
204 
205   entropy->cinfo = cinfo;
206   entropy->gather_statistics = gather_statistics;
207 
208   is_DC_band = (cinfo->Ss == 0);
209 
210   /* We assume jcmaster.c already validated the scan parameters. */
211 
212   /* Select execution routines */
213   if (cinfo->Ah == 0) {
214     if (is_DC_band)
215       entropy->pub.encode_mcu = encode_mcu_DC_first;
216     else
217       entropy->pub.encode_mcu = encode_mcu_AC_first;
218     if (jsimd_can_encode_mcu_AC_first_prepare())
219       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
220     else
221       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
222   } else {
223     if (is_DC_band)
224       entropy->pub.encode_mcu = encode_mcu_DC_refine;
225     else {
226       entropy->pub.encode_mcu = encode_mcu_AC_refine;
227       if (jsimd_can_encode_mcu_AC_refine_prepare())
228         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
229       else
230         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
231       /* AC refinement needs a correction bit buffer */
232       if (entropy->bit_buffer == NULL)
233         entropy->bit_buffer = (char *)
234           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
235                                       MAX_CORR_BITS * sizeof(char));
236     }
237   }
238   if (gather_statistics)
239     entropy->pub.finish_pass = finish_pass_gather_phuff;
240   else
241     entropy->pub.finish_pass = finish_pass_phuff;
242 
243   /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
244    * for AC coefficients.
245    */
246   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
247     compptr = cinfo->cur_comp_info[ci];
248     /* Initialize DC predictions to 0 */
249     entropy->last_dc_val[ci] = 0;
250     /* Get table index */
251     if (is_DC_band) {
252       if (cinfo->Ah != 0)       /* DC refinement needs no table */
253         continue;
254       tbl = compptr->dc_tbl_no;
255     } else {
256       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
257     }
258     if (gather_statistics) {
259       /* Check for invalid table index */
260       /* (make_c_derived_tbl does this in the other path) */
261       if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
262         ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
263       /* Allocate and zero the statistics tables */
264       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
265       if (entropy->count_ptrs[tbl] == NULL)
266         entropy->count_ptrs[tbl] = (long *)
267           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
268                                       257 * sizeof(long));
269       MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
270     } else {
271       /* Compute derived values for Huffman table */
272       /* We may do this more than once for a table, but it's not expensive */
273       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
274                               &entropy->derived_tbls[tbl]);
275     }
276   }
277 
278   /* Initialize AC stuff */
279   entropy->EOBRUN = 0;
280   entropy->BE = 0;
281 
282   /* Initialize bit buffer to empty */
283   entropy->put_buffer = 0;
284   entropy->put_bits = 0;
285 
286   /* Initialize restart stuff */
287   entropy->restarts_to_go = cinfo->restart_interval;
288   entropy->next_restart_num = 0;
289 }
290 
291 
292 /* Outputting bytes to the file.
293  * NB: these must be called only when actually outputting,
294  * that is, entropy->gather_statistics == FALSE.
295  */
296 
297 /* Emit a byte */
298 #define emit_byte(entropy, val) { \
299   *(entropy)->next_output_byte++ = (JOCTET)(val); \
300   if (--(entropy)->free_in_buffer == 0) \
301     dump_buffer(entropy); \
302 }
303 
304 
305 LOCAL(void)
dump_buffer(phuff_entropy_ptr entropy)306 dump_buffer(phuff_entropy_ptr entropy)
307 /* Empty the output buffer; we do not support suspension in this module. */
308 {
309   struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
310 
311   if (!(*dest->empty_output_buffer) (entropy->cinfo))
312     ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
313   /* After a successful buffer dump, must reset buffer pointers */
314   entropy->next_output_byte = dest->next_output_byte;
315   entropy->free_in_buffer = dest->free_in_buffer;
316 }
317 
318 
319 /* Outputting bits to the file */
320 
321 /* Only the right 24 bits of put_buffer are used; the valid bits are
322  * left-justified in this part.  At most 16 bits can be passed to emit_bits
323  * in one call, and we never retain more than 7 bits in put_buffer
324  * between calls, so 24 bits are sufficient.
325  */
326 
327 LOCAL(void)
emit_bits(phuff_entropy_ptr entropy,unsigned int code,int size)328 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
329 /* Emit some bits, unless we are in gather mode */
330 {
331   /* This routine is heavily used, so it's worth coding tightly. */
332   register size_t put_buffer = (size_t)code;
333   register int put_bits = entropy->put_bits;
334 
335   /* if size is 0, caller used an invalid Huffman table entry */
336   if (size == 0)
337     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
338 
339   if (entropy->gather_statistics)
340     return;                     /* do nothing if we're only getting stats */
341 
342   put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
343 
344   put_bits += size;             /* new number of bits in buffer */
345 
346   put_buffer <<= 24 - put_bits; /* align incoming bits */
347 
348   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
349 
350   while (put_bits >= 8) {
351     int c = (int)((put_buffer >> 16) & 0xFF);
352 
353     emit_byte(entropy, c);
354     if (c == 0xFF) {            /* need to stuff a zero byte? */
355       emit_byte(entropy, 0);
356     }
357     put_buffer <<= 8;
358     put_bits -= 8;
359   }
360 
361   entropy->put_buffer = put_buffer; /* update variables */
362   entropy->put_bits = put_bits;
363 }
364 
365 
366 LOCAL(void)
flush_bits(phuff_entropy_ptr entropy)367 flush_bits(phuff_entropy_ptr entropy)
368 {
369   emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
370   entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
371   entropy->put_bits = 0;
372 }
373 
374 
375 /*
376  * Emit (or just count) a Huffman symbol.
377  */
378 
379 LOCAL(void)
emit_symbol(phuff_entropy_ptr entropy,int tbl_no,int symbol)380 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
381 {
382   if (entropy->gather_statistics)
383     entropy->count_ptrs[tbl_no][symbol]++;
384   else {
385     c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
386     emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
387   }
388 }
389 
390 
391 /*
392  * Emit bits from a correction bit buffer.
393  */
394 
395 LOCAL(void)
emit_buffered_bits(phuff_entropy_ptr entropy,char * bufstart,unsigned int nbits)396 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
397                    unsigned int nbits)
398 {
399   if (entropy->gather_statistics)
400     return;                     /* no real work */
401 
402   while (nbits > 0) {
403     emit_bits(entropy, (unsigned int)(*bufstart), 1);
404     bufstart++;
405     nbits--;
406   }
407 }
408 
409 
410 /*
411  * Emit any pending EOBRUN symbol.
412  */
413 
414 LOCAL(void)
emit_eobrun(phuff_entropy_ptr entropy)415 emit_eobrun(phuff_entropy_ptr entropy)
416 {
417   register int temp, nbits;
418 
419   if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
420     temp = entropy->EOBRUN;
421     nbits = JPEG_NBITS_NONZERO(temp) - 1;
422     /* safety check: shouldn't happen given limited correction-bit buffer */
423     if (nbits > 14)
424       ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
425 
426     emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
427     if (nbits)
428       emit_bits(entropy, entropy->EOBRUN, nbits);
429 
430     entropy->EOBRUN = 0;
431 
432     /* Emit any buffered correction bits */
433     emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
434     entropy->BE = 0;
435   }
436 }
437 
438 
439 /*
440  * Emit a restart marker & resynchronize predictions.
441  */
442 
443 LOCAL(void)
emit_restart(phuff_entropy_ptr entropy,int restart_num)444 emit_restart(phuff_entropy_ptr entropy, int restart_num)
445 {
446   int ci;
447 
448   emit_eobrun(entropy);
449 
450   if (!entropy->gather_statistics) {
451     flush_bits(entropy);
452     emit_byte(entropy, 0xFF);
453     emit_byte(entropy, JPEG_RST0 + restart_num);
454   }
455 
456   if (entropy->cinfo->Ss == 0) {
457     /* Re-initialize DC predictions to 0 */
458     for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
459       entropy->last_dc_val[ci] = 0;
460   } else {
461     /* Re-initialize all AC-related fields to 0 */
462     entropy->EOBRUN = 0;
463     entropy->BE = 0;
464   }
465 }
466 
467 
468 /*
469  * MCU encoding for DC initial scan (either spectral selection,
470  * or first pass of successive approximation).
471  */
472 
473 METHODDEF(boolean)
encode_mcu_DC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)474 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
475 {
476   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
477   register int temp, temp2, temp3;
478   register int nbits;
479   int blkn, ci;
480   int Al = cinfo->Al;
481   JBLOCKROW block;
482   jpeg_component_info *compptr;
483   ISHIFT_TEMPS
484 
485   entropy->next_output_byte = cinfo->dest->next_output_byte;
486   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
487 
488   /* Emit restart marker if needed */
489   if (cinfo->restart_interval)
490     if (entropy->restarts_to_go == 0)
491       emit_restart(entropy, entropy->next_restart_num);
492 
493   /* Encode the MCU data blocks */
494   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
495     block = MCU_data[blkn];
496     ci = cinfo->MCU_membership[blkn];
497     compptr = cinfo->cur_comp_info[ci];
498 
499     /* Compute the DC value after the required point transform by Al.
500      * This is simply an arithmetic right shift.
501      */
502     temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
503 
504     /* DC differences are figured on the point-transformed values. */
505     temp = temp2 - entropy->last_dc_val[ci];
506     entropy->last_dc_val[ci] = temp2;
507 
508     /* Encode the DC coefficient difference per section G.1.2.1 */
509 
510     /* This is a well-known technique for obtaining the absolute value without
511      * a branch.  It is derived from an assembly language technique presented
512      * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
513      * 1997 by Agner Fog.
514      */
515     temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
516     temp ^= temp3;
517     temp -= temp3;              /* temp is abs value of input */
518     /* For a negative input, want temp2 = bitwise complement of abs(input) */
519     temp2 = temp ^ temp3;
520 
521     /* Find the number of bits needed for the magnitude of the coefficient */
522     nbits = JPEG_NBITS(temp);
523     /* Check for out-of-range coefficient values.
524      * Since we're encoding a difference, the range limit is twice as much.
525      */
526     if (nbits > MAX_COEF_BITS + 1)
527       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
528 
529     /* Count/emit the Huffman-coded symbol for the number of bits */
530     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
531 
532     /* Emit that number of bits of the value, if positive, */
533     /* or the complement of its magnitude, if negative. */
534     if (nbits)                  /* emit_bits rejects calls with size 0 */
535       emit_bits(entropy, (unsigned int)temp2, nbits);
536   }
537 
538   cinfo->dest->next_output_byte = entropy->next_output_byte;
539   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
540 
541   /* Update restart-interval state too */
542   if (cinfo->restart_interval) {
543     if (entropy->restarts_to_go == 0) {
544       entropy->restarts_to_go = cinfo->restart_interval;
545       entropy->next_restart_num++;
546       entropy->next_restart_num &= 7;
547     }
548     entropy->restarts_to_go--;
549   }
550 
551   return TRUE;
552 }
553 
554 
555 /*
556  * Data preparation for encode_mcu_AC_first().
557  */
558 
559 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
560   for (k = 0; k < Sl; k++) { \
561     temp = block[jpeg_natural_order_start[k]]; \
562     if (temp == 0) \
563       continue; \
564     /* We must apply the point transform by Al.  For AC coefficients this \
565      * is an integer division with rounding towards 0.  To do this portably \
566      * in C, we shift after obtaining the absolute value; so the code is \
567      * interwoven with finding the abs value (temp) and output bits (temp2). \
568      */ \
569     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
570     temp ^= temp2; \
571     temp -= temp2;              /* temp is abs value of input */ \
572     temp >>= Al;                /* apply the point transform */ \
573     /* Watch out for case that nonzero coef is zero after point transform */ \
574     if (temp == 0) \
575       continue; \
576     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
577     temp2 ^= temp; \
578     values[k] = temp; \
579     values[k + DCTSIZE2] = temp2; \
580     zerobits |= ((size_t)1U) << k; \
581   } \
582 }
583 
584 METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * bits)585 encode_mcu_AC_first_prepare(const JCOEF *block,
586                             const int *jpeg_natural_order_start, int Sl,
587                             int Al, JCOEF *values, size_t *bits)
588 {
589   register int k, temp, temp2;
590   size_t zerobits = 0U;
591   int Sl0 = Sl;
592 
593 #if SIZEOF_SIZE_T == 4
594   if (Sl0 > 32)
595     Sl0 = 32;
596 #endif
597 
598   COMPUTE_ABSVALUES_AC_FIRST(Sl0);
599 
600   bits[0] = zerobits;
601 #if SIZEOF_SIZE_T == 4
602   zerobits = 0U;
603 
604   if (Sl > 32) {
605     Sl -= 32;
606     jpeg_natural_order_start += 32;
607     values += 32;
608 
609     COMPUTE_ABSVALUES_AC_FIRST(Sl);
610   }
611   bits[1] = zerobits;
612 #endif
613 }
614 
615 /*
616  * MCU encoding for AC initial scan (either spectral selection,
617  * or first pass of successive approximation).
618  */
619 
620 #define ENCODE_COEFS_AC_FIRST(label) { \
621   while (zerobits) { \
622     r = count_zeroes(&zerobits); \
623     cvalue += r; \
624 label \
625     temp  = cvalue[0]; \
626     temp2 = cvalue[DCTSIZE2]; \
627     \
628     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
629     while (r > 15) { \
630       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
631       r -= 16; \
632     } \
633     \
634     /* Find the number of bits needed for the magnitude of the coefficient */ \
635     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
636     /* Check for out-of-range coefficient values */ \
637     if (nbits > MAX_COEF_BITS) \
638       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
639     \
640     /* Count/emit Huffman symbol for run length / number of bits */ \
641     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
642     \
643     /* Emit that number of bits of the value, if positive, */ \
644     /* or the complement of its magnitude, if negative. */ \
645     emit_bits(entropy, (unsigned int)temp2, nbits); \
646     \
647     cvalue++; \
648     zerobits >>= 1; \
649   } \
650 }
651 
652 METHODDEF(boolean)
encode_mcu_AC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)653 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
654 {
655   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
656   register int temp, temp2;
657   register int nbits, r;
658   int Sl = cinfo->Se - cinfo->Ss + 1;
659   int Al = cinfo->Al;
660   JCOEF values_unaligned[2 * DCTSIZE2 + 15];
661   JCOEF *values;
662   const JCOEF *cvalue;
663   size_t zerobits;
664   size_t bits[8 / SIZEOF_SIZE_T];
665 
666   entropy->next_output_byte = cinfo->dest->next_output_byte;
667   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
668 
669   /* Emit restart marker if needed */
670   if (cinfo->restart_interval)
671     if (entropy->restarts_to_go == 0)
672       emit_restart(entropy, entropy->next_restart_num);
673 
674 #ifdef WITH_SIMD
675   cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
676 #else
677   /* Not using SIMD, so alignment is not needed */
678   cvalue = values = values_unaligned;
679 #endif
680 
681   /* Prepare data */
682   entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
683                             Sl, Al, values, bits);
684 
685   zerobits = bits[0];
686 #if SIZEOF_SIZE_T == 4
687   zerobits |= bits[1];
688 #endif
689 
690   /* Emit any pending EOBRUN */
691   if (zerobits && (entropy->EOBRUN > 0))
692     emit_eobrun(entropy);
693 
694 #if SIZEOF_SIZE_T == 4
695   zerobits = bits[0];
696 #endif
697 
698   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
699 
700   ENCODE_COEFS_AC_FIRST((void)0;);
701 
702 #if SIZEOF_SIZE_T == 4
703   zerobits = bits[1];
704   if (zerobits) {
705     int diff = ((values + DCTSIZE2 / 2) - cvalue);
706     r = count_zeroes(&zerobits);
707     r += diff;
708     cvalue += r;
709     goto first_iter_ac_first;
710   }
711 
712   ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
713 #endif
714 
715   if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
716     entropy->EOBRUN++;          /* count an EOB */
717     if (entropy->EOBRUN == 0x7FFF)
718       emit_eobrun(entropy);     /* force it out to avoid overflow */
719   }
720 
721   cinfo->dest->next_output_byte = entropy->next_output_byte;
722   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
723 
724   /* Update restart-interval state too */
725   if (cinfo->restart_interval) {
726     if (entropy->restarts_to_go == 0) {
727       entropy->restarts_to_go = cinfo->restart_interval;
728       entropy->next_restart_num++;
729       entropy->next_restart_num &= 7;
730     }
731     entropy->restarts_to_go--;
732   }
733 
734   return TRUE;
735 }
736 
737 
738 /*
739  * MCU encoding for DC successive approximation refinement scan.
740  * Note: we assume such scans can be multi-component, although the spec
741  * is not very clear on the point.
742  */
743 
744 METHODDEF(boolean)
encode_mcu_DC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)745 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
746 {
747   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
748   register int temp;
749   int blkn;
750   int Al = cinfo->Al;
751   JBLOCKROW block;
752 
753   entropy->next_output_byte = cinfo->dest->next_output_byte;
754   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
755 
756   /* Emit restart marker if needed */
757   if (cinfo->restart_interval)
758     if (entropy->restarts_to_go == 0)
759       emit_restart(entropy, entropy->next_restart_num);
760 
761   /* Encode the MCU data blocks */
762   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
763     block = MCU_data[blkn];
764 
765     /* We simply emit the Al'th bit of the DC coefficient value. */
766     temp = (*block)[0];
767     emit_bits(entropy, (unsigned int)(temp >> Al), 1);
768   }
769 
770   cinfo->dest->next_output_byte = entropy->next_output_byte;
771   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
772 
773   /* Update restart-interval state too */
774   if (cinfo->restart_interval) {
775     if (entropy->restarts_to_go == 0) {
776       entropy->restarts_to_go = cinfo->restart_interval;
777       entropy->next_restart_num++;
778       entropy->next_restart_num &= 7;
779     }
780     entropy->restarts_to_go--;
781   }
782 
783   return TRUE;
784 }
785 
786 
787 /*
788  * Data preparation for encode_mcu_AC_refine().
789  */
790 
791 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
792   /* It is convenient to make a pre-pass to determine the transformed \
793    * coefficients' absolute values and the EOB position. \
794    */ \
795   for (k = 0; k < Sl; k++) { \
796     temp = block[jpeg_natural_order_start[k]]; \
797     /* We must apply the point transform by Al.  For AC coefficients this \
798      * is an integer division with rounding towards 0.  To do this portably \
799      * in C, we shift after obtaining the absolute value. \
800      */ \
801     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
802     temp ^= temp2; \
803     temp -= temp2;              /* temp is abs value of input */ \
804     temp >>= Al;                /* apply the point transform */ \
805     if (temp != 0) { \
806       zerobits |= ((size_t)1U) << k; \
807       signbits |= ((size_t)(temp2 + 1)) << k; \
808     } \
809     absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
810     if (temp == 1) \
811       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
812   } \
813 }
814 
815 METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)816 encode_mcu_AC_refine_prepare(const JCOEF *block,
817                              const int *jpeg_natural_order_start, int Sl,
818                              int Al, JCOEF *absvalues, size_t *bits)
819 {
820   register int k, temp, temp2;
821   int EOB = 0;
822   size_t zerobits = 0U, signbits = 0U;
823   int Sl0 = Sl;
824 
825 #if SIZEOF_SIZE_T == 4
826   if (Sl0 > 32)
827     Sl0 = 32;
828 #endif
829 
830   COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
831 
832   bits[0] = zerobits;
833 #if SIZEOF_SIZE_T == 8
834   bits[1] = signbits;
835 #else
836   bits[2] = signbits;
837 
838   zerobits = 0U;
839   signbits = 0U;
840 
841   if (Sl > 32) {
842     Sl -= 32;
843     jpeg_natural_order_start += 32;
844     absvalues += 32;
845 
846     COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
847   }
848 
849   bits[1] = zerobits;
850   bits[3] = signbits;
851 #endif
852 
853   return EOB;
854 }
855 
856 
857 /*
858  * MCU encoding for AC successive approximation refinement scan.
859  */
860 
861 #define ENCODE_COEFS_AC_REFINE(label) { \
862   while (zerobits) { \
863     int idx = count_zeroes(&zerobits); \
864     r += idx; \
865     cabsvalue += idx; \
866     signbits >>= idx; \
867 label \
868     /* Emit any required ZRLs, but not if they can be folded into EOB */ \
869     while (r > 15 && (cabsvalue <= EOBPTR)) { \
870       /* emit any pending EOBRUN and the BE correction bits */ \
871       emit_eobrun(entropy); \
872       /* Emit ZRL */ \
873       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
874       r -= 16; \
875       /* Emit buffered correction bits that must be associated with ZRL */ \
876       emit_buffered_bits(entropy, BR_buffer, BR); \
877       BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
878       BR = 0; \
879     } \
880     \
881     temp = *cabsvalue++; \
882     \
883     /* If the coef was previously nonzero, it only needs a correction bit. \
884      * NOTE: a straight translation of the spec's figure G.7 would suggest \
885      * that we also need to test r > 15.  But if r > 15, we can only get here \
886      * if k > EOB, which implies that this coefficient is not 1. \
887      */ \
888     if (temp > 1) { \
889       /* The correction bit is the next bit of the absolute value. */ \
890       BR_buffer[BR++] = (char)(temp & 1); \
891       signbits >>= 1; \
892       zerobits >>= 1; \
893       continue; \
894     } \
895     \
896     /* Emit any pending EOBRUN and the BE correction bits */ \
897     emit_eobrun(entropy); \
898     \
899     /* Count/emit Huffman symbol for run length / number of bits */ \
900     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
901     \
902     /* Emit output bit for newly-nonzero coef */ \
903     temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
904     emit_bits(entropy, (unsigned int)temp, 1); \
905     \
906     /* Emit buffered correction bits that must be associated with this code */ \
907     emit_buffered_bits(entropy, BR_buffer, BR); \
908     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
909     BR = 0; \
910     r = 0;                      /* reset zero run length */ \
911     signbits >>= 1; \
912     zerobits >>= 1; \
913   } \
914 }
915 
916 METHODDEF(boolean)
encode_mcu_AC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)917 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
918 {
919   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
920   register int temp, r;
921   char *BR_buffer;
922   unsigned int BR;
923   int Sl = cinfo->Se - cinfo->Ss + 1;
924   int Al = cinfo->Al;
925   JCOEF absvalues_unaligned[DCTSIZE2 + 15];
926   JCOEF *absvalues;
927   const JCOEF *cabsvalue, *EOBPTR;
928   size_t zerobits, signbits;
929   size_t bits[16 / SIZEOF_SIZE_T];
930 
931   entropy->next_output_byte = cinfo->dest->next_output_byte;
932   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
933 
934   /* Emit restart marker if needed */
935   if (cinfo->restart_interval)
936     if (entropy->restarts_to_go == 0)
937       emit_restart(entropy, entropy->next_restart_num);
938 
939 #ifdef WITH_SIMD
940   cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
941 #else
942   /* Not using SIMD, so alignment is not needed */
943   cabsvalue = absvalues = absvalues_unaligned;
944 #endif
945 
946   /* Prepare data */
947   EOBPTR = absvalues +
948     entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
949                                Sl, Al, absvalues, bits);
950 
951   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
952 
953   r = 0;                        /* r = run length of zeros */
954   BR = 0;                       /* BR = count of buffered bits added now */
955   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
956 
957   zerobits = bits[0];
958 #if SIZEOF_SIZE_T == 8
959   signbits = bits[1];
960 #else
961   signbits = bits[2];
962 #endif
963   ENCODE_COEFS_AC_REFINE((void)0;);
964 
965 #if SIZEOF_SIZE_T == 4
966   zerobits = bits[1];
967   signbits = bits[3];
968 
969   if (zerobits) {
970     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
971     int idx = count_zeroes(&zerobits);
972     signbits >>= idx;
973     idx += diff;
974     r += idx;
975     cabsvalue += idx;
976     goto first_iter_ac_refine;
977   }
978 
979   ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
980 #endif
981 
982   r |= (int)((absvalues + Sl) - cabsvalue);
983 
984   if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
985     entropy->EOBRUN++;          /* count an EOB */
986     entropy->BE += BR;          /* concat my correction bits to older ones */
987     /* We force out the EOB if we risk either:
988      * 1. overflow of the EOB counter;
989      * 2. overflow of the correction bit buffer during the next MCU.
990      */
991     if (entropy->EOBRUN == 0x7FFF ||
992         entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
993       emit_eobrun(entropy);
994   }
995 
996   cinfo->dest->next_output_byte = entropy->next_output_byte;
997   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
998 
999   /* Update restart-interval state too */
1000   if (cinfo->restart_interval) {
1001     if (entropy->restarts_to_go == 0) {
1002       entropy->restarts_to_go = cinfo->restart_interval;
1003       entropy->next_restart_num++;
1004       entropy->next_restart_num &= 7;
1005     }
1006     entropy->restarts_to_go--;
1007   }
1008 
1009   return TRUE;
1010 }
1011 
1012 
1013 /*
1014  * Finish up at the end of a Huffman-compressed progressive scan.
1015  */
1016 
1017 METHODDEF(void)
finish_pass_phuff(j_compress_ptr cinfo)1018 finish_pass_phuff(j_compress_ptr cinfo)
1019 {
1020   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1021 
1022   entropy->next_output_byte = cinfo->dest->next_output_byte;
1023   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1024 
1025   /* Flush out any buffered data */
1026   emit_eobrun(entropy);
1027   flush_bits(entropy);
1028 
1029   cinfo->dest->next_output_byte = entropy->next_output_byte;
1030   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1031 }
1032 
1033 
1034 /*
1035  * Finish up a statistics-gathering pass and create the new Huffman tables.
1036  */
1037 
1038 METHODDEF(void)
finish_pass_gather_phuff(j_compress_ptr cinfo)1039 finish_pass_gather_phuff(j_compress_ptr cinfo)
1040 {
1041   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1042   boolean is_DC_band;
1043   int ci, tbl;
1044   jpeg_component_info *compptr;
1045   JHUFF_TBL **htblptr;
1046   boolean did[NUM_HUFF_TBLS];
1047 
1048   /* Flush out buffered data (all we care about is counting the EOB symbol) */
1049   emit_eobrun(entropy);
1050 
1051   is_DC_band = (cinfo->Ss == 0);
1052 
1053   /* It's important not to apply jpeg_gen_optimal_table more than once
1054    * per table, because it clobbers the input frequency counts!
1055    */
1056   MEMZERO(did, sizeof(did));
1057 
1058   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1059     compptr = cinfo->cur_comp_info[ci];
1060     if (is_DC_band) {
1061       if (cinfo->Ah != 0)       /* DC refinement needs no table */
1062         continue;
1063       tbl = compptr->dc_tbl_no;
1064     } else {
1065       tbl = compptr->ac_tbl_no;
1066     }
1067     if (!did[tbl]) {
1068       if (is_DC_band)
1069         htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1070       else
1071         htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1072       if (*htblptr == NULL)
1073         *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1074       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1075       did[tbl] = TRUE;
1076     }
1077   }
1078 }
1079 
1080 
1081 /*
1082  * Module initialization routine for progressive Huffman entropy encoding.
1083  */
1084 
1085 GLOBAL(void)
jinit_phuff_encoder(j_compress_ptr cinfo)1086 jinit_phuff_encoder(j_compress_ptr cinfo)
1087 {
1088   phuff_entropy_ptr entropy;
1089   int i;
1090 
1091   entropy = (phuff_entropy_ptr)
1092     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1093                                 sizeof(phuff_entropy_encoder));
1094   cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1095   entropy->pub.start_pass = start_pass_phuff;
1096 
1097   /* Mark tables unallocated */
1098   for (i = 0; i < NUM_HUFF_TBLS; i++) {
1099     entropy->derived_tbls[i] = NULL;
1100     entropy->count_ptrs[i] = NULL;
1101   }
1102   entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
1103 }
1104 
1105 #endif /* C_PROGRESSIVE_SUPPORTED */
1106