1 // Copyright (c) 2015 Sergio Gonzalez. All rights reserved.
2 // License: https://github.com/serge-rgb/milton#license
3 
4 
5 /**
6  * tiny_jpeg.h
7  *
8  * Tiny JPEG Encoder
9  *  - Sergio Gonzalez
10  *
11  * This is a readable and simple single-header JPEG encoder.
12  *
13  * Features
14  *  - Implements Baseline DCT JPEG compression.
15  *  - No dynamic allocations.
16  *
17  * This library is coded in the spirit of the stb libraries and mostly follows
18  * the stb guidelines.
19  *
20  * It is written in C99. And depends on the C standard library.
21  *
22  * Other requirements
23  *  - Assumes little endian machine.
24  *
25  * Tested on:
26  *  Linux x64 (clang)
27  *  Windows
28  *  OSX
29  *
30  * This software is in the public domain. Where that dedication is not
31  * recognized, you are granted a perpetual, irrevocable license to copy
32  * and modify this file as you see fit.*
33  */
34 
35 // ============================================================
36 // Usage
37 // ============================================================
38 // Include "tiny_jpeg.h" to and use the public interface defined below.
39 //
40 // You *must* do:
41 //
42 //      #define TJE_IMPLEMENTATION
43 //      #include "tiny_jpeg.h"
44 //
45 // in exactly one of your C files to actually compile the implementation.
46 
47 
48 // Here is an example program that loads a bmp with stb_image and writes it
49 // with Tiny JPEG
50 
51 /*
52 
53 #define STB_IMAGE_IMPLEMENTATION
54 #include "stb_image.h"
55 
56 
57 #define TJE_IMPLEMENTATION
58 #include "tiny_jpeg.h"
59 
60 
61 int main()
62 {
63     int width, height, num_components;
64     unsigned char* data = stbi_load("in.bmp", &width, &height, &num_components, 0);
65     if ( !data ) {
66         puts("Could not find file");
67         return EXIT_FAILURE;
68     }
69 
70     if ( !tje_encode_to_file("out.jpg", width, height, num_components, data) ) {
71         fprintf(stderr, "Could not write JPEG\n");
72         return EXIT_FAILURE;
73     }
74 
75     return EXIT_SUCCESS;
76 }
77 
78 */
79 
80 
81 
82 #ifdef __cplusplus
83 extern "C"
84 {
85 #endif
86 
87 #if defined(__GNUC__) || defined(__clang__)
88 #pragma GCC diagnostic push
89 #pragma GCC diagnostic ignored "-Wmissing-field-initializers"  // We use {0}, which will zero-out the struct.
90 #pragma GCC diagnostic ignored "-Wmissing-braces"
91 #pragma GCC diagnostic ignored "-Wpadded"
92 #endif
93 
94 // ============================================================9
95 // Public interface:
96 // ============================================================
97 
98 #ifndef TJE_HEADER_GUARD
99 #define TJE_HEADER_GUARD
100 
101 // - tje_encode_to_file -
102 //
103 // Usage:
104 //  Takes bitmap data and writes a JPEG-encoded image to disk.
105 //
106 //  PARAMETERS
107 //      dest_path:          filename to which we will write. e.g. "out.jpg"
108 //      width, height:      image size in pixels
109 //      num_components:     3 is RGB. 4 is RGBA. Those are the only supported values
110 //      src_data:           pointer to the pixel data.
111 //
112 //  RETURN:
113 //      0 on error. 1 on success.
114 
115 int tje_encode_to_file(const char* dest_path,
116                        const int width,
117                        const int height,
118                        const int num_components,
119                        const unsigned char* src_data);
120 
121 // - tje_encode_to_file_at_quality -
122 //
123 // Usage:
124 //  Takes bitmap data and writes a JPEG-encoded image to disk.
125 //
126 //  PARAMETERS
127 //      dest_path:          filename to which we will write. e.g. "out.jpg"
128 //      quality:            3: Highest. Compression varies wildly (between 1/3 and 1/20).
129 //                          2: Very good quality. About 1/2 the size of 3.
130 //                          1: Noticeable. About 1/6 the size of 3, or 1/3 the size of 2.
131 //      width, height:      image size in pixels
132 //      num_components:     3 is RGB. 4 is RGBA. Those are the only supported values
133 //      src_data:           pointer to the pixel data.
134 //
135 //  RETURN:
136 //      0 on error. 1 on success.
137 
138 int tje_encode_to_file_at_quality(const char* dest_path,
139                                   const int quality,
140                                   const int width,
141                                   const int height,
142                                   const int num_components,
143                                   const unsigned char* src_data);
144 
145 // - tje_encode_with_func -
146 //
147 // Usage
148 //  Same as tje_encode_to_file_at_quality, but it takes a callback that knows
149 //  how to handle (or ignore) `context`. The callback receives an array `data`
150 //  of `size` bytes, which can be written directly to a file. There is no need
151 //  to free the data.
152 
153 typedef void tje_write_func(void* context, void* data, int size);
154 
155 int tje_encode_with_func(tje_write_func* func,
156                          void* context,
157                          const int quality,
158                          const int width,
159                          const int height,
160                          const int num_components,
161                          const unsigned char* src_data);
162 
163 #endif // TJE_HEADER_GUARD
164 
165 
166 
167 // Implementation: In exactly one of the source files of your application,
168 // define TJE_IMPLEMENTATION and include tiny_jpeg.h
169 
170 // ============================================================
171 // Internal
172 // ============================================================
173 #ifdef TJE_IMPLEMENTATION
174 
175 
176 #define tjei_min(a, b) ((a) < b) ? (a) : (b);
177 #define tjei_max(a, b) ((a) < b) ? (b) : (a);
178 
179 
180 #if defined(_MSC_VER)
181 #define TJEI_FORCE_INLINE __forceinline
182 // #define TJEI_FORCE_INLINE __declspec(noinline)  // For profiling
183 #else
184 #define TJEI_FORCE_INLINE static // TODO: equivalent for gcc & clang
185 #endif
186 
187 // Only use zero for debugging and/or inspection.
188 #define TJE_USE_FAST_DCT 1
189 
190 // C std lib
191 #include <assert.h>
192 #include <inttypes.h>
193 #include <math.h>   // floorf, ceilf
194 #include <stdio.h>  // FILE, puts
195 #include <string.h> // memcpy(float)[rsp+208h]
196 
197 
198 #define TJEI_BUFFER_SIZE 1024
199 
200 // Buffer TJE_BUFFER_SIZE in memory and flush when ready
201 static size_t tjei_g_output_buffer_count;
202 static uint8_t tjei_g_output_buffer[TJEI_BUFFER_SIZE];
203 
204 
205 #ifdef _WIN32
206 
207 #include <windows.h>
208 #ifndef snprintf
209 #define snprintf sprintf_s
210 #endif
211 // Not quite the same but it works for us. If I am not mistaken, it differs
212 // only in the return value.
213 
214 #endif
215 
216 #ifndef NDEBUG
217 
218 #ifdef _WIN32
219 #define tje_log(msg) OutputDebugStringA(msg)
220 #elif defined(__linux__) || defined(__MACH__) || defined(__FreeBSD__)
221 #define tje_log(msg) puts(msg)
222 #endif
223 
224 #else  // NDEBUG
225 #define tje_log(msg)
226 #endif  // NDEBUG
227 
228 
229 typedef struct {
230     void* context;
231     tje_write_func* func;
232 } TJEWriteContext;
233 
234 typedef struct TJEState_s {
235     uint8_t     ehuffsize[4][257];
236     uint16_t    ehuffcode[4][256];
237 
238     uint8_t*    ht_bits[4];
239     uint8_t*    ht_vals[4];
240 
241     uint8_t     qt_luma[64];
242     uint8_t     qt_chroma[64];
243 
244     TJEWriteContext write_context;
245 } TJEState;
246 
247 // ============================================================
248 // Table definitions.
249 //
250 // The spec defines tjei_default reasonably good quantization matrices and huffman
251 // specification tables.
252 //
253 //
254 // Instead of hard-coding the final huffman table, we only hard-code the table
255 // spec suggested by the specification, and then derive the full table from
256 // there.  This is only for didactic purposes but it might be useful if there
257 // ever is the case that we need to swap huffman tables from various sources.
258 // ============================================================
259 
260 
261 // K.1 - suggested luminance QT
262 static uint8_t tjei_default_qt_luma_from_spec[] = {
263     16,11,10,16, 24, 40, 51, 61,
264     12,12,14,19, 26, 58, 60, 55,
265     14,13,16,24, 40, 57, 69, 56,
266     14,17,22,29, 51, 87, 80, 62,
267     18,22,37,56, 68,109,103, 77,
268     24,35,55,64, 81,104,113, 92,
269     49,64,78,87,103,121,120,101,
270     72,92,95,98,112,100,103, 99,
271 };
272 
273 // Unused
274 #if 0
275 static uint8_t tjei_default_qt_chroma_from_spec[] = {
276     // K.1 - suggested chrominance QT
277     17,18,24,47,99,99,99,99,
278     18,21,26,66,99,99,99,99,
279     24,26,56,99,99,99,99,99,
280     47,66,99,99,99,99,99,99,
281     99,99,99,99,99,99,99,99,
282     99,99,99,99,99,99,99,99,
283     99,99,99,99,99,99,99,99,
284     99,99,99,99,99,99,99,99,
285 };
286 #endif
287 
288 static uint8_t tjei_default_qt_chroma_from_paper[] = {
289     // Example QT from JPEG paper
290     16,  12, 14,  14, 18, 24,  49,  72,
291     11,  10, 16,  24, 40, 51,  61,  12,
292     13,  17, 22,  35, 64, 92,  14,  16,
293     22,  37, 55,  78, 95, 19,  24,  29,
294     56,  64, 87,  98, 26, 40,  51,  68,
295     81, 103, 112, 58, 57, 87,  109, 104,
296     121,100, 60,  69, 80, 103, 113, 120,
297     103, 55, 56,  62, 77, 92,  101, 99,
298 };
299 
300 // == Procedure to 'deflate' the huffman tree: JPEG spec, C.2
301 
302 // Number of 16 bit values for every code length. (K.3.3.1)
303 static uint8_t tjei_default_ht_luma_dc_len[16] = {
304     0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0
305 };
306 // values
307 static uint8_t tjei_default_ht_luma_dc[12] = {
308     0,1,2,3,4,5,6,7,8,9,10,11
309 };
310 
311 // Number of 16 bit values for every code length. (K.3.3.1)
312 static uint8_t tjei_default_ht_chroma_dc_len[16] = {
313     0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0
314 };
315 // values
316 static uint8_t tjei_default_ht_chroma_dc[12] = {
317     0,1,2,3,4,5,6,7,8,9,10,11
318 };
319 
320 // Same as above, but AC coefficients.
321 static uint8_t tjei_default_ht_luma_ac_len[16] = {
322     0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d
323 };
324 static uint8_t tjei_default_ht_luma_ac[] = {
325     0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
326     0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
327     0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
328     0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
329     0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
330     0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
331     0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
332     0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
333     0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
334     0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
335     0xF9, 0xFA
336 };
337 
338 static uint8_t tjei_default_ht_chroma_ac_len[16] = {
339     0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77
340 };
341 static uint8_t tjei_default_ht_chroma_ac[] = {
342     0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
343     0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
344     0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
345     0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
346     0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
347     0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
348     0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
349     0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
350     0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
351     0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
352     0xF9, 0xFA
353 };
354 
355 
356 // ============================================================
357 // Code
358 // ============================================================
359 
360 // Zig-zag order:
361 static uint8_t tjei_zig_zag[64] = {
362     0,   1,  5,  6, 14, 15, 27, 28,
363     2,   4,  7, 13, 16, 26, 29, 42,
364     3,   8, 12, 17, 25, 30, 41, 43,
365     9,  11, 18, 24, 31, 40, 44, 53,
366     10, 19, 23, 32, 39, 45, 52, 54,
367     20, 22, 33, 38, 46, 51, 55, 60,
368     21, 34, 37, 47, 50, 56, 59, 61,
369     35, 36, 48, 49, 57, 58, 62, 63,
370 };
371 
372 // Memory order as big endian. 0xhilo -> 0xlohi which looks as 0xhilo in memory.
tjei_be_word(const uint16_t le_word)373 static uint16_t tjei_be_word(const uint16_t le_word)
374 {
375     uint16_t lo = (le_word & 0x00ff);
376     uint16_t hi = ((le_word & 0xff00) >> 8);
377     return (uint16_t)((lo << 8) | hi);
378 }
379 
380 // ============================================================
381 // The following structs exist only for code clarity, debugability, and
382 // readability. They are used when writing to disk, but it is useful to have
383 // 1-packed-structs to document how the format works, and to inspect memory
384 // while developing.
385 // ============================================================
386 
387 static const uint8_t tjeik_jfif_id[] = "JFIF";
388 static const uint8_t tjeik_com_str[] = "Created by Tiny JPEG Encoder";
389 
390 // TODO: Get rid of packed structs!
391 #pragma pack(push)
392 #pragma pack(1)
393 typedef struct TJEJPEGHeader_s {
394     uint16_t SOI;
395     // JFIF header.
396     uint16_t APP0;
397     uint16_t jfif_len;
398     uint8_t  jfif_id[5];
399     uint16_t version;
400     uint8_t  units;
401     uint16_t x_density;
402     uint16_t y_density;
403     uint8_t  x_thumb;
404     uint8_t  y_thumb;
405 } TJEJPEGHeader;
406 
407 typedef struct TJEJPEGComment_s {
408     uint16_t com;
409     uint16_t com_len;
410     char     com_str[sizeof(tjeik_com_str) - 1];
411 } TJEJPEGComment;
412 
413 // Helper struct for TJEFrameHeader (below).
414 typedef struct TJEComponentSpec_s {
415     uint8_t  component_id;
416     uint8_t  sampling_factors;    // most significant 4 bits: horizontal. 4 LSB: vertical (A.1.1)
417     uint8_t  qt;                  // Quantization table selector.
418 } TJEComponentSpec;
419 
420 typedef struct TJEFrameHeader_s {
421     uint16_t         SOF;
422     uint16_t         len;                   // 8 + 3 * frame.num_components
423     uint8_t          precision;             // Sample precision (bits per sample).
424     uint16_t         height;
425     uint16_t         width;
426     uint8_t          num_components;        // For this implementation, will be equal to 3.
427     TJEComponentSpec component_spec[3];
428 } TJEFrameHeader;
429 
430 typedef struct TJEFrameComponentSpec_s {
431     uint8_t component_id;                 // Just as with TJEComponentSpec
432     uint8_t dc_ac;                        // (dc|ac)
433 } TJEFrameComponentSpec;
434 
435 typedef struct TJEScanHeader_s {
436     uint16_t              SOS;
437     uint16_t              len;
438     uint8_t               num_components;  // 3.
439     TJEFrameComponentSpec component_spec[3];
440     uint8_t               first;  // 0
441     uint8_t               last;  // 63
442     uint8_t               ah_al;  // o
443 } TJEScanHeader;
444 #pragma pack(pop)
445 
446 
tjei_write(TJEState * state,void * data,size_t num_bytes,size_t num_elements)447 static void tjei_write(TJEState* state, void* data, size_t num_bytes, size_t num_elements)
448 {
449     size_t to_write = num_bytes * num_elements;
450 
451     // Cap to the buffer available size and copy memory.
452     size_t capped_count = tjei_min(to_write, TJEI_BUFFER_SIZE - 1 - tjei_g_output_buffer_count);
453 
454     memcpy(tjei_g_output_buffer + tjei_g_output_buffer_count, data, capped_count);
455     tjei_g_output_buffer_count += capped_count;
456 
457     assert (tjei_g_output_buffer_count <= TJEI_BUFFER_SIZE - 1);
458 
459     // Flush the buffer.
460     if ( tjei_g_output_buffer_count == TJEI_BUFFER_SIZE - 1 ) {
461         state->write_context.func(state->write_context.context, tjei_g_output_buffer, (int)tjei_g_output_buffer_count);
462         tjei_g_output_buffer_count = 0;
463     }
464 
465     // Recursively calling ourselves with the rest of the buffer.
466     if (capped_count < to_write) {
467         tjei_write(state, (uint8_t*)data+capped_count, to_write - capped_count, 1);
468     }
469 }
470 
tjei_write_DQT(TJEState * state,uint8_t * matrix,uint8_t id)471 static void tjei_write_DQT(TJEState* state, uint8_t* matrix, uint8_t id)
472 {
473     uint16_t DQT = tjei_be_word(0xffdb);
474     tjei_write(state, &DQT, sizeof(uint16_t), 1);
475     uint16_t len = tjei_be_word(0x0043); // 2(len) + 1(id) + 64(matrix) = 67 = 0x43
476     tjei_write(state, &len, sizeof(uint16_t), 1);
477     assert(id < 4);
478     uint8_t precision_and_id = id;  // 0x0000 8 bits | 0x00id
479     tjei_write(state, &precision_and_id, sizeof(uint8_t), 1);
480     // Write matrix
481     tjei_write(state, matrix, 64*sizeof(uint8_t), 1);
482 }
483 
484 typedef enum {
485     TJEI_DC = 0,
486     TJEI_AC = 1
487 } TJEHuffmanTableClass;
488 
tjei_write_DHT(TJEState * state,uint8_t * matrix_len,uint8_t * matrix_val,TJEHuffmanTableClass ht_class,uint8_t id)489 static void tjei_write_DHT(TJEState* state,
490                            uint8_t* matrix_len,
491                            uint8_t* matrix_val,
492                            TJEHuffmanTableClass ht_class,
493                            uint8_t id)
494 {
495     int num_values = 0;
496     for ( int i = 0; i < 16; ++i ) {
497         num_values += matrix_len[i];
498     }
499     assert(num_values <= 0xffff);
500 
501     uint16_t DHT = tjei_be_word(0xffc4);
502     // 2(len) + 1(Tc|th) + 16 (num lengths) + ?? (num values)
503     uint16_t len = tjei_be_word(2 + 1 + 16 + (uint16_t)num_values);
504     assert(id < 4);
505     uint8_t tc_th = (uint8_t)((((uint8_t)ht_class) << 4) | id);
506 
507     tjei_write(state, &DHT, sizeof(uint16_t), 1);
508     tjei_write(state, &len, sizeof(uint16_t), 1);
509     tjei_write(state, &tc_th, sizeof(uint8_t), 1);
510     tjei_write(state, matrix_len, sizeof(uint8_t), 16);
511     tjei_write(state, matrix_val, sizeof(uint8_t), (size_t)num_values);
512 }
513 // ============================================================
514 //  Huffman deflation code.
515 // ============================================================
516 
517 // Returns all code sizes from the BITS specification (JPEG C.3)
tjei_huff_get_code_lengths(uint8_t huffsize[],uint8_t * bits)518 static uint8_t* tjei_huff_get_code_lengths(uint8_t huffsize[/*256*/], uint8_t* bits)
519 {
520     int k = 0;
521     for ( int i = 0; i < 16; ++i ) {
522         for ( int j = 0; j < bits[i]; ++j ) {
523             huffsize[k++] = (uint8_t)(i + 1);
524         }
525         huffsize[k] = 0;
526     }
527     return huffsize;
528 }
529 
530 // Fills out the prefixes for each code.
tjei_huff_get_codes(uint16_t codes[],uint8_t * huffsize,int64_t count)531 static uint16_t* tjei_huff_get_codes(uint16_t codes[], uint8_t* huffsize, int64_t count)
532 {
533     uint16_t code = 0;
534     int k = 0;
535     uint8_t sz = huffsize[0];
536     for(;;) {
537         do {
538             assert(k < count);
539             codes[k++] = code++;
540         } while (huffsize[k] == sz);
541         if (huffsize[k] == 0) {
542             return codes;
543         }
544         do {
545             code = (uint16_t)(code << 1);
546             ++sz;
547         } while( huffsize[k] != sz );
548     }
549 }
550 
tjei_huff_get_extended(uint8_t * out_ehuffsize,uint16_t * out_ehuffcode,uint8_t * huffval,uint8_t * huffsize,uint16_t * huffcode,int64_t count)551 static void tjei_huff_get_extended(uint8_t* out_ehuffsize,
552                                    uint16_t* out_ehuffcode,
553                                    uint8_t* huffval,
554                                    uint8_t* huffsize,
555                                    uint16_t* huffcode, int64_t count)
556 {
557     int k = 0;
558     do {
559         uint8_t val = huffval[k];
560         out_ehuffcode[val] = huffcode[k];
561         out_ehuffsize[val] = huffsize[k];
562         k++;
563     } while ( k < count );
564 }
565 // ============================================================
566 
567 // Returns:
568 //  out[1] : number of bits
569 //  out[0] : bits
tjei_calculate_variable_length_int(int value,uint16_t out[2])570 TJEI_FORCE_INLINE void tjei_calculate_variable_length_int(int value, uint16_t out[2])
571 {
572     int abs_val = value;
573     if ( value < 0 ) {
574         abs_val = -abs_val;
575         --value;
576     }
577     out[1] = 1;
578     while( abs_val >>= 1 ) {
579         ++out[1];
580     }
581     out[0] = (uint16_t)(value & ((1 << out[1]) - 1));
582 }
583 
584 // Write bits to file.
tjei_write_bits(TJEState * state,uint32_t * bitbuffer,uint32_t * location,uint16_t num_bits,uint16_t bits)585 TJEI_FORCE_INLINE void tjei_write_bits(TJEState* state,
586                                        uint32_t* bitbuffer, uint32_t* location,
587                                        uint16_t num_bits, uint16_t bits)
588 {
589     //   v-- location
590     //  [                     ]   <-- bit buffer
591     // 32                     0
592     //
593     // This call pushes to the bitbuffer and saves the location. Data is pushed
594     // from most significant to less significant.
595     // When we can write a full byte, we write a byte and shift.
596 
597     // Push the stack.
598     uint32_t nloc = *location + num_bits;
599     *bitbuffer |= (uint32_t)(bits << (32 - nloc));
600     *location = nloc;
601     while ( *location >= 8 ) {
602         // Grab the most significant byte.
603         uint8_t c = (uint8_t)((*bitbuffer) >> 24);
604         // Write it to file.
605         tjei_write(state, &c, 1, 1);
606         if ( c == 0xff )  {
607             // Special case: tell JPEG this is not a marker.
608             char z = 0;
609             tjei_write(state, &z, 1, 1);
610         }
611         // Pop the stack.
612         *bitbuffer <<= 8;
613         *location -= 8;
614     }
615 }
616 
617 // DCT implementation by Thomas G. Lane.
618 // Obtained through NVIDIA
619 //  http://developer.download.nvidia.com/SDK/9.5/Samples/vidimaging_samples.html#gpgpu_dct
620 //
621 // QUOTE:
622 //  This implementation is based on Arai, Agui, and Nakajima's algorithm for
623 //  scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
624 //  Japanese, but the algorithm is described in the Pennebaker & Mitchell
625 //  JPEG textbook (see REFERENCES section in file README).  The following code
626 //  is based directly on figure 4-8 in P&M.
627 //
tjei_fdct(float * data)628 static void tjei_fdct (float * data)
629 {
630     float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
631     float tmp10, tmp11, tmp12, tmp13;
632     float z1, z2, z3, z4, z5, z11, z13;
633     float *dataptr;
634     int ctr;
635 
636     /* Pass 1: process rows. */
637 
638     dataptr = data;
639     for ( ctr = 7; ctr >= 0; ctr-- ) {
640         tmp0 = dataptr[0] + dataptr[7];
641         tmp7 = dataptr[0] - dataptr[7];
642         tmp1 = dataptr[1] + dataptr[6];
643         tmp6 = dataptr[1] - dataptr[6];
644         tmp2 = dataptr[2] + dataptr[5];
645         tmp5 = dataptr[2] - dataptr[5];
646         tmp3 = dataptr[3] + dataptr[4];
647         tmp4 = dataptr[3] - dataptr[4];
648 
649         /* Even part */
650 
651         tmp10 = tmp0 + tmp3;    /* phase 2 */
652         tmp13 = tmp0 - tmp3;
653         tmp11 = tmp1 + tmp2;
654         tmp12 = tmp1 - tmp2;
655 
656         dataptr[0] = tmp10 + tmp11; /* phase 3 */
657         dataptr[4] = tmp10 - tmp11;
658 
659         z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
660         dataptr[2] = tmp13 + z1;    /* phase 5 */
661         dataptr[6] = tmp13 - z1;
662 
663         /* Odd part */
664 
665         tmp10 = tmp4 + tmp5;    /* phase 2 */
666         tmp11 = tmp5 + tmp6;
667         tmp12 = tmp6 + tmp7;
668 
669         /* The rotator is modified from fig 4-8 to avoid extra negations. */
670         z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
671         z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
672         z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
673         z3 = tmp11 * ((float) 0.707106781); /* c4 */
674 
675         z11 = tmp7 + z3;        /* phase 5 */
676         z13 = tmp7 - z3;
677 
678         dataptr[5] = z13 + z2;  /* phase 6 */
679         dataptr[3] = z13 - z2;
680         dataptr[1] = z11 + z4;
681         dataptr[7] = z11 - z4;
682 
683         dataptr += 8;     /* advance pointer to next row */
684     }
685 
686     /* Pass 2: process columns. */
687 
688     dataptr = data;
689     for ( ctr = 8-1; ctr >= 0; ctr-- ) {
690         tmp0 = dataptr[8*0] + dataptr[8*7];
691         tmp7 = dataptr[8*0] - dataptr[8*7];
692         tmp1 = dataptr[8*1] + dataptr[8*6];
693         tmp6 = dataptr[8*1] - dataptr[8*6];
694         tmp2 = dataptr[8*2] + dataptr[8*5];
695         tmp5 = dataptr[8*2] - dataptr[8*5];
696         tmp3 = dataptr[8*3] + dataptr[8*4];
697         tmp4 = dataptr[8*3] - dataptr[8*4];
698 
699         /* Even part */
700 
701         tmp10 = tmp0 + tmp3;    /* phase 2 */
702         tmp13 = tmp0 - tmp3;
703         tmp11 = tmp1 + tmp2;
704         tmp12 = tmp1 - tmp2;
705 
706         dataptr[8*0] = tmp10 + tmp11; /* phase 3 */
707         dataptr[8*4] = tmp10 - tmp11;
708 
709         z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
710         dataptr[8*2] = tmp13 + z1; /* phase 5 */
711         dataptr[8*6] = tmp13 - z1;
712 
713         /* Odd part */
714 
715         tmp10 = tmp4 + tmp5;    /* phase 2 */
716         tmp11 = tmp5 + tmp6;
717         tmp12 = tmp6 + tmp7;
718 
719         /* The rotator is modified from fig 4-8 to avoid extra negations. */
720         z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
721         z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
722         z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
723         z3 = tmp11 * ((float) 0.707106781); /* c4 */
724 
725         z11 = tmp7 + z3;        /* phase 5 */
726         z13 = tmp7 - z3;
727 
728         dataptr[8*5] = z13 + z2; /* phase 6 */
729         dataptr[8*3] = z13 - z2;
730         dataptr[8*1] = z11 + z4;
731         dataptr[8*7] = z11 - z4;
732 
733         dataptr++;          /* advance pointer to next column */
734     }
735 }
736 #if !TJE_USE_FAST_DCT
slow_fdct(int u,int v,float * data)737 static float slow_fdct(int u, int v, float* data)
738 {
739 #define kPI 3.14159265f
740     float res = 0.0f;
741     float cu = (u == 0) ? 0.70710678118654f : 1;
742     float cv = (v == 0) ? 0.70710678118654f : 1;
743     for ( int y = 0; y < 8; ++y ) {
744         for ( int x = 0; x < 8; ++x ) {
745             res += (data[y * 8 + x]) *
746                     cosf(((2.0f * x + 1.0f) * u * kPI) / 16.0f) *
747                     cosf(((2.0f * y + 1.0f) * v * kPI) / 16.0f);
748         }
749     }
750     res *= 0.25f * cu * cv;
751     return res;
752 #undef kPI
753 }
754 #endif
755 
756 #if !defined(ABS)
757     #define ABS(x) ((x) < 0 ? -(x) : (x))
758 #endif
759 
tjei_encode_and_write_MCU(TJEState * state,float * mcu,float * qt,uint8_t * huff_dc_len,uint16_t * huff_dc_code,uint8_t * huff_ac_len,uint16_t * huff_ac_code,int * pred,uint32_t * bitbuffer,uint32_t * location)760 static void tjei_encode_and_write_MCU(TJEState* state,
761                                       float* mcu,
762 #if TJE_USE_FAST_DCT
763                                       float* qt,  // Pre-processed quantization matrix.
764 #else
765                                       uint8_t* qt,
766 #endif
767                                       uint8_t* huff_dc_len, uint16_t* huff_dc_code, // Huffman tables
768                                       uint8_t* huff_ac_len, uint16_t* huff_ac_code,
769                                       int* pred,  // Previous DC coefficient
770                                       uint32_t* bitbuffer,  // Bitstack.
771                                       uint32_t* location)
772 {
773     int du[64];  // Data unit in zig-zag order
774 
775     float dct_mcu[64];
776     memcpy(dct_mcu, mcu, 64 * sizeof(float));
777 
778 #if TJE_USE_FAST_DCT
779     tjei_fdct(dct_mcu);
780     for ( int i = 0; i < 64; ++i ) {
781         float fval = dct_mcu[i];
782         fval *= qt[i];
783 #if 0
784         fval = (fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f);
785 #else
786         fval = floorf(fval + 1024 + 0.5f);
787         fval -= 1024;
788 #endif
789         int val = (int)fval;
790         du[tjei_zig_zag[i]] = val;
791     }
792 #else
793     for ( int v = 0; v < 8; ++v ) {
794         for ( int u = 0; u < 8; ++u ) {
795             dct_mcu[v * 8 + u] = slow_fdct(u, v, mcu);
796         }
797     }
798     for ( int i = 0; i < 64; ++i ) {
799         float fval = dct_mcu[i] / (qt[i]);
800         int val = (int)((fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f));
801         du[tjei_zig_zag[i]] = val;
802     }
803 #endif
804 
805     uint16_t vli[2];
806 
807     // Encode DC coefficient.
808     int diff = du[0] - *pred;
809     *pred = du[0];
810     if ( diff != 0 ) {
811         tjei_calculate_variable_length_int(diff, vli);
812         // Write number of bits with Huffman coding
813         tjei_write_bits(state, bitbuffer, location, huff_dc_len[vli[1]], huff_dc_code[vli[1]]);
814         // Write the bits.
815         tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
816     } else {
817         tjei_write_bits(state, bitbuffer, location, huff_dc_len[0], huff_dc_code[0]);
818     }
819 
820     // ==== Encode AC coefficients ====
821 
822     int last_non_zero_i = 0;
823     // Find the last non-zero element.
824     for ( int i = 63; i > 0; --i ) {
825         if (du[i] != 0) {
826             last_non_zero_i = i;
827             break;
828         }
829     }
830 
831     for ( int i = 1; i <= last_non_zero_i; ++i ) {
832         // If zero, increase count. If >=15, encode (FF,00)
833         int zero_count = 0;
834         while ( du[i] == 0 ) {
835             ++zero_count;
836             ++i;
837             if (zero_count == 16) {
838                 // encode (ff,00) == 0xf0
839                 tjei_write_bits(state, bitbuffer, location, huff_ac_len[0xf0], huff_ac_code[0xf0]);
840                 zero_count = 0;
841             }
842         }
843         tjei_calculate_variable_length_int(du[i], vli);
844 
845         assert(zero_count < 0x10);
846         assert(vli[1] <= 10);
847 
848         uint16_t sym1 = (uint16_t)((uint16_t)zero_count << 4) | vli[1];
849 
850         assert(huff_ac_len[sym1] != 0);
851 
852         // Write symbol 1  --- (RUNLENGTH, SIZE)
853         tjei_write_bits(state, bitbuffer, location, huff_ac_len[sym1], huff_ac_code[sym1]);
854         // Write symbol 2  --- (AMPLITUDE)
855         tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
856     }
857 
858     if (last_non_zero_i != 63) {
859         // write EOB HUFF(00,00)
860         tjei_write_bits(state, bitbuffer, location, huff_ac_len[0], huff_ac_code[0]);
861     }
862     return;
863 }
864 
865 enum {
866     TJEI_LUMA_DC,
867     TJEI_LUMA_AC,
868     TJEI_CHROMA_DC,
869     TJEI_CHROMA_AC,
870 };
871 
872 #if TJE_USE_FAST_DCT
873 struct TJEProcessedQT {
874     float chroma[64];
875     float luma[64];
876 };
877 #endif
878 
879 // Set up huffman tables in state.
tjei_huff_expand(TJEState * state)880 static void tjei_huff_expand (TJEState* state)
881 {
882     assert(state);
883 
884     state->ht_bits[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc_len;
885     state->ht_bits[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac_len;
886     state->ht_bits[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc_len;
887     state->ht_bits[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac_len;
888 
889     state->ht_vals[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc;
890     state->ht_vals[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac;
891     state->ht_vals[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc;
892     state->ht_vals[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac;
893 
894     // How many codes in total for each of LUMA_(DC|AC) and CHROMA_(DC|AC)
895     int32_t spec_tables_len[4] = { 0 };
896 
897     for ( int i = 0; i < 4; ++i ) {
898         for ( int k = 0; k < 16; ++k ) {
899             spec_tables_len[i] += state->ht_bits[i][k];
900         }
901     }
902 
903     // Fill out the extended tables..
904     uint8_t huffsize[4][257];
905     uint16_t huffcode[4][256];
906     for ( int i = 0; i < 4; ++i ) {
907         assert (256 >= spec_tables_len[i]);
908         tjei_huff_get_code_lengths(huffsize[i], state->ht_bits[i]);
909         tjei_huff_get_codes(huffcode[i], huffsize[i], spec_tables_len[i]);
910     }
911     for ( int i = 0; i < 4; ++i ) {
912         int64_t count = spec_tables_len[i];
913         tjei_huff_get_extended(state->ehuffsize[i],
914                                state->ehuffcode[i],
915                                state->ht_vals[i],
916                                &huffsize[i][0],
917                                &huffcode[i][0], count);
918     }
919 }
920 
tjei_encode_main(TJEState * state,const unsigned char * src_data,const int width,const int height,const int src_num_components)921 static int tjei_encode_main(TJEState* state,
922                             const unsigned char* src_data,
923                             const int width,
924                             const int height,
925                             const int src_num_components)
926 {
927     if (src_num_components != 3 && src_num_components != 4) {
928         return 0;
929     }
930 
931     if (width > 0xffff || height > 0xffff) {
932         return 0;
933     }
934 
935 #if TJE_USE_FAST_DCT
936     struct TJEProcessedQT pqt;
937     // Again, taken from classic japanese implementation.
938     //
939     /* For float AA&N IDCT method, divisors are equal to quantization
940      * coefficients scaled by scalefactor[row]*scalefactor[col], where
941      *   scalefactor[0] = 1
942      *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
943      * We apply a further scale factor of 8.
944      * What's actually stored is 1/divisor so that the inner loop can
945      * use a multiplication rather than a division.
946      */
947     static const float aan_scales[] = {
948         1.0f, 1.387039845f, 1.306562965f, 1.175875602f,
949         1.0f, 0.785694958f, 0.541196100f, 0.275899379f
950     };
951 
952     // build (de)quantization tables
953     for(int y=0; y<8; y++) {
954         for(int x=0; x<8; x++) {
955             int i = y*8 + x;
956             pqt.luma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_luma[tjei_zig_zag[i]]);
957             pqt.chroma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_chroma[tjei_zig_zag[i]]);
958         }
959     }
960 #endif
961 
962     { // Write header
963         TJEJPEGHeader header;
964         // JFIF header.
965         header.SOI = tjei_be_word(0xffd8);  // Sequential DCT
966         header.APP0 = tjei_be_word(0xffe0);
967 
968         uint16_t jfif_len = sizeof(TJEJPEGHeader) - 4 /*SOI & APP0 markers*/;
969         header.jfif_len = tjei_be_word(jfif_len);
970         memcpy(header.jfif_id, (void*)tjeik_jfif_id, 5);
971         header.version = tjei_be_word(0x0102);
972         header.units = 0x01;  // Dots-per-inch
973         header.x_density = tjei_be_word(0x0060);  // 96 DPI
974         header.y_density = tjei_be_word(0x0060);  // 96 DPI
975         header.x_thumb = 0;
976         header.y_thumb = 0;
977         tjei_write(state, &header, sizeof(TJEJPEGHeader), 1);
978     }
979     {  // Write comment
980         TJEJPEGComment com;
981         uint16_t com_len = 2 + sizeof(tjeik_com_str) - 1;
982         // Comment
983         com.com = tjei_be_word(0xfffe);
984         com.com_len = tjei_be_word(com_len);
985         memcpy(com.com_str, (void*)tjeik_com_str, sizeof(tjeik_com_str)-1);
986         tjei_write(state, &com, sizeof(TJEJPEGComment), 1);
987     }
988 
989     // Write quantization tables.
990     tjei_write_DQT(state, state->qt_luma, 0x00);
991     tjei_write_DQT(state, state->qt_chroma, 0x01);
992 
993     {  // Write the frame marker.
994         TJEFrameHeader header;
995         header.SOF = tjei_be_word(0xffc0);
996         header.len = tjei_be_word(8 + 3 * 3);
997         header.precision = 8;
998         assert(width <= 0xffff);
999         assert(height <= 0xffff);
1000         header.width = tjei_be_word((uint16_t)width);
1001         header.height = tjei_be_word((uint16_t)height);
1002         header.num_components = 3;
1003         uint8_t tables[3] = {
1004             0,  // Luma component gets luma table (see tjei_write_DQT call above.)
1005             1,  // Chroma component gets chroma table
1006             1,  // Chroma component gets chroma table
1007         };
1008         for (int i = 0; i < 3; ++i) {
1009             TJEComponentSpec spec;
1010             spec.component_id = (uint8_t)(i + 1);  // No particular reason. Just 1, 2, 3.
1011             spec.sampling_factors = (uint8_t)0x11;
1012             spec.qt = tables[i];
1013 
1014             header.component_spec[i] = spec;
1015         }
1016         // Write to file.
1017         tjei_write(state, &header, sizeof(TJEFrameHeader), 1);
1018     }
1019 
1020     tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_DC],   state->ht_vals[TJEI_LUMA_DC], TJEI_DC, 0);
1021     tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_AC],   state->ht_vals[TJEI_LUMA_AC], TJEI_AC, 0);
1022     tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_DC], state->ht_vals[TJEI_CHROMA_DC], TJEI_DC, 1);
1023     tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_AC], state->ht_vals[TJEI_CHROMA_AC], TJEI_AC, 1);
1024 
1025     // Write start of scan
1026     {
1027         TJEScanHeader header;
1028         header.SOS = tjei_be_word(0xffda);
1029         header.len = tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec) * 3)));
1030         header.num_components = 3;
1031 
1032         uint8_t tables[3] = {
1033             0x00,
1034             0x11,
1035             0x11,
1036         };
1037         for (int i = 0; i < 3; ++i) {
1038             TJEFrameComponentSpec cs;
1039             // Must be equal to component_id from frame header above.
1040             cs.component_id = (uint8_t)(i + 1);
1041             cs.dc_ac = (uint8_t)tables[i];
1042 
1043             header.component_spec[i] = cs;
1044         }
1045         header.first = 0;
1046         header.last  = 63;
1047         header.ah_al = 0;
1048         tjei_write(state, &header, sizeof(TJEScanHeader), 1);
1049 
1050     }
1051     // Write compressed data.
1052 
1053     float du_y[64];
1054     float du_b[64];
1055     float du_r[64];
1056 
1057     // Set diff to 0.
1058     int pred_y = 0;
1059     int pred_b = 0;
1060     int pred_r = 0;
1061 
1062     // Bit stack
1063     uint32_t bitbuffer = 0;
1064     uint32_t location = 0;
1065 
1066 
1067     for ( int y = 0; y < height; y += 8 ) {
1068         for ( int x = 0; x < width; x += 8 ) {
1069             // Block loop: ====
1070             for ( int off_y = 0; off_y < 8; ++off_y ) {
1071                 for ( int off_x = 0; off_x < 8; ++off_x ) {
1072                     int block_index = (off_y * 8 + off_x);
1073 
1074                     int src_index = (((y + off_y) * width) + (x + off_x)) * src_num_components;
1075 
1076                     int col = x + off_x;
1077                     int row = y + off_y;
1078 
1079                     if(row >= height) {
1080                         src_index -= (width * (row - height + 1)) * src_num_components;
1081                     }
1082                     if(col >= width) {
1083                         src_index -= (col - width + 1) * src_num_components;
1084                     }
1085                     assert(src_index < width * height * src_num_components);
1086 
1087                     uint8_t r = src_data[src_index + 0];
1088                     uint8_t g = src_data[src_index + 1];
1089                     uint8_t b = src_data[src_index + 2];
1090 
1091                     float luma = 0.299f   * r + 0.587f    * g + 0.114f    * b - 128;
1092                     float cb   = -0.1687f * r - 0.3313f   * g + 0.5f      * b;
1093                     float cr   = 0.5f     * r - 0.4187f   * g - 0.0813f   * b;
1094 
1095                     du_y[block_index] = luma;
1096                     du_b[block_index] = cb;
1097                     du_r[block_index] = cr;
1098                 }
1099             }
1100 
1101             tjei_encode_and_write_MCU(state, du_y,
1102 #if TJE_USE_FAST_DCT
1103                                      pqt.luma,
1104 #else
1105                                      state->qt_luma,
1106 #endif
1107                                      state->ehuffsize[TJEI_LUMA_DC], state->ehuffcode[TJEI_LUMA_DC],
1108                                      state->ehuffsize[TJEI_LUMA_AC], state->ehuffcode[TJEI_LUMA_AC],
1109                                      &pred_y, &bitbuffer, &location);
1110             tjei_encode_and_write_MCU(state, du_b,
1111 #if TJE_USE_FAST_DCT
1112                                      pqt.chroma,
1113 #else
1114                                      state->qt_chroma,
1115 #endif
1116                                      state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
1117                                      state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
1118                                      &pred_b, &bitbuffer, &location);
1119             tjei_encode_and_write_MCU(state, du_r,
1120 #if TJE_USE_FAST_DCT
1121                                      pqt.chroma,
1122 #else
1123                                      state->qt_chroma,
1124 #endif
1125                                      state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
1126                                      state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
1127                                      &pred_r, &bitbuffer, &location);
1128 
1129 
1130         }
1131     }
1132 
1133     // Finish the image.
1134     { // Flush
1135         if (location > 0 && location < 8) {
1136             tjei_write_bits(state, &bitbuffer, &location, (uint16_t)(8 - location), 0);
1137         }
1138     }
1139     uint16_t EOI = tjei_be_word(0xffd9);
1140     tjei_write(state, &EOI, sizeof(uint16_t), 1);
1141 
1142     if (tjei_g_output_buffer_count) {
1143         state->write_context.func(state->write_context.context, tjei_g_output_buffer, (int)tjei_g_output_buffer_count);
1144         tjei_g_output_buffer_count = 0;
1145     }
1146 
1147     return 1;
1148 }
1149 
tje_encode_to_file(const char * dest_path,const int width,const int height,const int num_components,const unsigned char * src_data)1150 int tje_encode_to_file(const char* dest_path,
1151                        const int width,
1152                        const int height,
1153                        const int num_components,
1154                        const unsigned char* src_data)
1155 {
1156     int res = tje_encode_to_file_at_quality(dest_path, 3, width, height, num_components, src_data);
1157     return res;
1158 }
1159 
tjei_stdlib_func(void * context,void * data,int size)1160 static void tjei_stdlib_func(void* context, void* data, int size)
1161 {
1162     FILE* fd = (FILE*)context;
1163     fwrite(data, size, 1, fd);
1164 }
1165 
1166 // Define public interface.
tje_encode_to_file_at_quality(const char * dest_path,const int quality,const int width,const int height,const int num_components,const unsigned char * src_data)1167 int tje_encode_to_file_at_quality(const char* dest_path,
1168                                   const int quality,
1169                                   const int width,
1170                                   const int height,
1171                                   const int num_components,
1172                                   const unsigned char* src_data)
1173 {
1174     FILE* fd = fopen(dest_path, "wb");
1175     if (!fd) {
1176         tje_log("Could not open file for writing.");
1177         return 0;
1178     }
1179 
1180     int result = tje_encode_with_func(tjei_stdlib_func, fd,
1181                                       quality, width, height, num_components, src_data);
1182 
1183     result |= 0 == fclose(fd);
1184 
1185     return result;
1186 }
1187 
tje_encode_with_func(tje_write_func * func,void * context,const int quality,const int width,const int height,const int num_components,const unsigned char * src_data)1188 int tje_encode_with_func(tje_write_func* func,
1189                          void* context,
1190                          const int quality,
1191                          const int width,
1192                          const int height,
1193                          const int num_components,
1194                          const unsigned char* src_data)
1195 {
1196     if (quality < 1 || quality > 3) {
1197         tje_log("[ERROR] -- Valid 'quality' values are 1 (lowest), 2, or 3 (highest)\n");
1198         return 0;
1199     }
1200 
1201     TJEState state = { 0 };
1202 
1203     uint8_t qt_factor = 1;
1204     switch(quality) {
1205     case 3:
1206         for ( int i = 0; i < 64; ++i ) {
1207             state.qt_luma[i]   = 1;
1208             state.qt_chroma[i] = 1;
1209         }
1210         break;
1211     case 2:
1212         qt_factor = 10;
1213         // don't break. fall through.
1214     case 1:
1215         for ( int i = 0; i < 64; ++i ) {
1216             state.qt_luma[i]   = tjei_default_qt_luma_from_spec[i] / qt_factor;
1217             if (state.qt_luma[i] == 0) {
1218                 state.qt_luma[i] = 1;
1219             }
1220             state.qt_chroma[i] = tjei_default_qt_chroma_from_paper[i] / qt_factor;
1221             if (state.qt_chroma[i] == 0) {
1222                 state.qt_chroma[i] = 1;
1223             }
1224         }
1225         break;
1226     default:
1227         assert(!"invalid code path");
1228         break;
1229     }
1230 
1231     TJEWriteContext wc = { 0 };
1232 
1233     wc.context = context;
1234     wc.func = func;
1235 
1236     state.write_context = wc;
1237 
1238 
1239     tjei_huff_expand(&state);
1240 
1241     int result = tjei_encode_main(&state, src_data, width, height, num_components);
1242 
1243     return result;
1244 }
1245 // ============================================================
1246 #endif // TJE_IMPLEMENTATION
1247 // ============================================================
1248 //
1249 #if defined(__GNUC__) || defined(__clang__)
1250 #pragma GCC diagnostic pop
1251 #endif
1252 
1253 
1254 #ifdef __cplusplus
1255 }  // extern C
1256 #endif
1257 
1258