1 // jpgd.cpp - C++ class for JPEG decompression. Written by Richard Geldreich <richgel99@gmail.com> between 1994-2020.
2 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
3 // Supports box and linear chroma upsampling.
4 //
5 // Released under two licenses. You are free to choose which license you want:
6 // License 1:
7 // Public Domain
8 //
9 // License 2:
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 //    http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // Unless required by applicable law or agreed to in writing, software
17 // distributed under the License is distributed on an "AS IS" BASIS,
18 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 // See the License for the specific language governing permissions and
20 // limitations under the License.
21 //
22 // Alex Evans: Linear memory allocator (taken from jpge.h).
23 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings
24 // v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings.
25 //
26 // Important:
27 // #define JPGD_USE_SSE2 to 0 to completely disable SSE2 usage.
28 //
29 #include "jpgd.h"
30 #include <string.h>
31 #include <algorithm>
32 #include <assert.h>
33 
34 #ifdef _MSC_VER
35 #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
36 #endif
37 
38 #ifndef JPGD_USE_SSE2
39 
40 	#if defined(__GNUC__)
41 		#if defined(__SSE2__)
42 			#define JPGD_USE_SSE2 (1)
43 		#endif
44 	#elif defined(_MSC_VER)
45 		#if defined(_M_X64)
46 			#define JPGD_USE_SSE2 (1)
47 		#endif
48 	#endif
49 
50 #endif
51 
52 #define JPGD_TRUE (1)
53 #define JPGD_FALSE (0)
54 
55 #define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
56 #define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
57 
58 namespace jpgd {
59 
60 	static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); }
61 	static inline void jpgd_free(void* p) { free(p); }
62 
63 	// DCT coefficients are stored in this sequence.
64 	static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
65 
66 	enum JPEG_MARKER
67 	{
68 		M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
69 		M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
70 		M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
71 		M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
72 		M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
73 	};
74 
75 	enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
76 
77 #if JPGD_USE_SSE2
78 #include "jpgd_idct.h"
79 #endif
80 
81 #define CONST_BITS  13
82 #define PASS1_BITS  2
83 #define SCALEDONE ((int32)1)
84 
85 #define FIX_0_298631336  ((int32)2446)        /* FIX(0.298631336) */
86 #define FIX_0_390180644  ((int32)3196)        /* FIX(0.390180644) */
87 #define FIX_0_541196100  ((int32)4433)        /* FIX(0.541196100) */
88 #define FIX_0_765366865  ((int32)6270)        /* FIX(0.765366865) */
89 #define FIX_0_899976223  ((int32)7373)        /* FIX(0.899976223) */
90 #define FIX_1_175875602  ((int32)9633)        /* FIX(1.175875602) */
91 #define FIX_1_501321110  ((int32)12299)       /* FIX(1.501321110) */
92 #define FIX_1_847759065  ((int32)15137)       /* FIX(1.847759065) */
93 #define FIX_1_961570560  ((int32)16069)       /* FIX(1.961570560) */
94 #define FIX_2_053119869  ((int32)16819)       /* FIX(2.053119869) */
95 #define FIX_2_562915447  ((int32)20995)       /* FIX(2.562915447) */
96 #define FIX_3_072711026  ((int32)25172)       /* FIX(3.072711026) */
97 
98 #define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
99 #define DESCALE_ZEROSHIFT(x,n)  (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
100 
101 #define MULTIPLY(var, cnst)  ((var) * (cnst))
102 
103 #define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
104 
105 	static inline int left_shifti(int val, uint32_t bits)
106 	{
107 		return static_cast<int>(static_cast<uint32_t>(val) << bits);
108 	}
109 
110 	// Compiler creates a fast path 1D IDCT for X non-zero columns
111 	template <int NONZERO_COLS>
112 	struct Row
113 	{
114 		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
115 		{
116 			// ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0.
117 #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
118 
119 			const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
120 
121 			const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
122 			const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
123 			const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
124 
125 			const int tmp0 = left_shifti(ACCESS_COL(0) + ACCESS_COL(4), CONST_BITS);
126 			const int tmp1 = left_shifti(ACCESS_COL(0) - ACCESS_COL(4), CONST_BITS);
127 
128 			const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
129 
130 			const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
131 
132 			const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
133 			const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
134 
135 			const int az1 = MULTIPLY(bz1, -FIX_0_899976223);
136 			const int az2 = MULTIPLY(bz2, -FIX_2_562915447);
137 			const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5;
138 			const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5;
139 
140 			const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
141 			const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
142 			const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
143 			const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
144 
145 			pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS);
146 			pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS);
147 			pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS);
148 			pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS);
149 			pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS);
150 			pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS);
151 			pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS);
152 			pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS);
153 		}
154 	};
155 
156 	template <>
157 	struct Row<0>
158 	{
159 		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
160 		{
161 			(void)pTemp;
162 			(void)pSrc;
163 		}
164 	};
165 
166 	template <>
167 	struct Row<1>
168 	{
169 		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
170 		{
171 			const int dcval = left_shifti(pSrc[0], PASS1_BITS);
172 
173 			pTemp[0] = dcval;
174 			pTemp[1] = dcval;
175 			pTemp[2] = dcval;
176 			pTemp[3] = dcval;
177 			pTemp[4] = dcval;
178 			pTemp[5] = dcval;
179 			pTemp[6] = dcval;
180 			pTemp[7] = dcval;
181 		}
182 	};
183 
184 	// Compiler creates a fast path 1D IDCT for X non-zero rows
185 	template <int NONZERO_ROWS>
186 	struct Col
187 	{
188 		static void idct(uint8* pDst_ptr, const int* pTemp)
189 		{
190 			// ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
191 #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
192 
193 			const int z2 = ACCESS_ROW(2);
194 			const int z3 = ACCESS_ROW(6);
195 
196 			const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
197 			const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
198 			const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
199 
200 			const int tmp0 = left_shifti(ACCESS_ROW(0) + ACCESS_ROW(4), CONST_BITS);
201 			const int tmp1 = left_shifti(ACCESS_ROW(0) - ACCESS_ROW(4), CONST_BITS);
202 
203 			const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
204 
205 			const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
206 
207 			const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
208 			const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
209 
210 			const int az1 = MULTIPLY(bz1, -FIX_0_899976223);
211 			const int az2 = MULTIPLY(bz2, -FIX_2_562915447);
212 			const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5;
213 			const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5;
214 
215 			const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
216 			const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
217 			const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
218 			const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
219 
220 			int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3);
221 			pDst_ptr[8 * 0] = (uint8)CLAMP(i);
222 
223 			i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3);
224 			pDst_ptr[8 * 7] = (uint8)CLAMP(i);
225 
226 			i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3);
227 			pDst_ptr[8 * 1] = (uint8)CLAMP(i);
228 
229 			i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3);
230 			pDst_ptr[8 * 6] = (uint8)CLAMP(i);
231 
232 			i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3);
233 			pDst_ptr[8 * 2] = (uint8)CLAMP(i);
234 
235 			i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3);
236 			pDst_ptr[8 * 5] = (uint8)CLAMP(i);
237 
238 			i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3);
239 			pDst_ptr[8 * 3] = (uint8)CLAMP(i);
240 
241 			i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3);
242 			pDst_ptr[8 * 4] = (uint8)CLAMP(i);
243 		}
244 	};
245 
246 	template <>
247 	struct Col<1>
248 	{
249 		static void idct(uint8* pDst_ptr, const int* pTemp)
250 		{
251 			int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3);
252 			const uint8 dcval_clamped = (uint8)CLAMP(dcval);
253 			pDst_ptr[0 * 8] = dcval_clamped;
254 			pDst_ptr[1 * 8] = dcval_clamped;
255 			pDst_ptr[2 * 8] = dcval_clamped;
256 			pDst_ptr[3 * 8] = dcval_clamped;
257 			pDst_ptr[4 * 8] = dcval_clamped;
258 			pDst_ptr[5 * 8] = dcval_clamped;
259 			pDst_ptr[6 * 8] = dcval_clamped;
260 			pDst_ptr[7 * 8] = dcval_clamped;
261 		}
262 	};
263 
264 	static const uint8 s_idct_row_table[] =
265 	{
266 	  1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
267 	  4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
268 	  6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
269 	  6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
270 	  8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
271 	  8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
272 	  8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
273 	  8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
274 	};
275 
276 	static const uint8 s_idct_col_table[] =
277 	{
278 		1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
279 		7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
280 	};
281 
282 	// Scalar "fast pathing" IDCT.
283 	static void idct(const jpgd_block_coeff_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag, bool use_simd)
284 	{
285 		(void)use_simd;
286 
287 		assert(block_max_zag >= 1);
288 		assert(block_max_zag <= 64);
289 
290 		if (block_max_zag <= 1)
291 		{
292 			int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
293 			k = CLAMP(k);
294 			k = k | (k << 8);
295 			k = k | (k << 16);
296 
297 			for (int i = 8; i > 0; i--)
298 			{
299 				*(int*)&pDst_ptr[0] = k;
300 				*(int*)&pDst_ptr[4] = k;
301 				pDst_ptr += 8;
302 			}
303 			return;
304 		}
305 
306 #if JPGD_USE_SSE2
307 		if (use_simd)
308 		{
309 			assert((((uintptr_t)pSrc_ptr) & 15) == 0);
310 			assert((((uintptr_t)pDst_ptr) & 15) == 0);
311 			idctSSEShortU8(pSrc_ptr, pDst_ptr);
312 			return;
313 		}
314 #endif
315 
316 		int temp[64];
317 
318 		const jpgd_block_coeff_t* pSrc = pSrc_ptr;
319 		int* pTemp = temp;
320 
321 		const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
322 		int i;
323 		for (i = 8; i > 0; i--, pRow_tab++)
324 		{
325 			switch (*pRow_tab)
326 			{
327 			case 0: Row<0>::idct(pTemp, pSrc); break;
328 			case 1: Row<1>::idct(pTemp, pSrc); break;
329 			case 2: Row<2>::idct(pTemp, pSrc); break;
330 			case 3: Row<3>::idct(pTemp, pSrc); break;
331 			case 4: Row<4>::idct(pTemp, pSrc); break;
332 			case 5: Row<5>::idct(pTemp, pSrc); break;
333 			case 6: Row<6>::idct(pTemp, pSrc); break;
334 			case 7: Row<7>::idct(pTemp, pSrc); break;
335 			case 8: Row<8>::idct(pTemp, pSrc); break;
336 			}
337 
338 			pSrc += 8;
339 			pTemp += 8;
340 		}
341 
342 		pTemp = temp;
343 
344 		const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
345 		for (i = 8; i > 0; i--)
346 		{
347 			switch (nonzero_rows)
348 			{
349 			case 1: Col<1>::idct(pDst_ptr, pTemp); break;
350 			case 2: Col<2>::idct(pDst_ptr, pTemp); break;
351 			case 3: Col<3>::idct(pDst_ptr, pTemp); break;
352 			case 4: Col<4>::idct(pDst_ptr, pTemp); break;
353 			case 5: Col<5>::idct(pDst_ptr, pTemp); break;
354 			case 6: Col<6>::idct(pDst_ptr, pTemp); break;
355 			case 7: Col<7>::idct(pDst_ptr, pTemp); break;
356 			case 8: Col<8>::idct(pDst_ptr, pTemp); break;
357 			}
358 
359 			pTemp++;
360 			pDst_ptr++;
361 		}
362 	}
363 
364 	// Retrieve one character from the input stream.
365 	inline uint jpeg_decoder::get_char()
366 	{
367 		// Any bytes remaining in buffer?
368 		if (!m_in_buf_left)
369 		{
370 			// Try to get more bytes.
371 			prep_in_buffer();
372 			// Still nothing to get?
373 			if (!m_in_buf_left)
374 			{
375 				// Pad the end of the stream with 0xFF 0xD9 (EOI marker)
376 				int t = m_tem_flag;
377 				m_tem_flag ^= 1;
378 				if (t)
379 					return 0xD9;
380 				else
381 					return 0xFF;
382 			}
383 		}
384 
385 		uint c = *m_pIn_buf_ofs++;
386 		m_in_buf_left--;
387 
388 		return c;
389 	}
390 
391 	// Same as previous method, except can indicate if the character is a pad character or not.
392 	inline uint jpeg_decoder::get_char(bool* pPadding_flag)
393 	{
394 		if (!m_in_buf_left)
395 		{
396 			prep_in_buffer();
397 			if (!m_in_buf_left)
398 			{
399 				*pPadding_flag = true;
400 				int t = m_tem_flag;
401 				m_tem_flag ^= 1;
402 				if (t)
403 					return 0xD9;
404 				else
405 					return 0xFF;
406 			}
407 		}
408 
409 		*pPadding_flag = false;
410 
411 		uint c = *m_pIn_buf_ofs++;
412 		m_in_buf_left--;
413 
414 		return c;
415 	}
416 
417 	// Inserts a previously retrieved character back into the input buffer.
418 	inline void jpeg_decoder::stuff_char(uint8 q)
419 	{
420 		// This could write before the input buffer, but we've placed another array there.
421 		*(--m_pIn_buf_ofs) = q;
422 		m_in_buf_left++;
423 	}
424 
425 	// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
426 	inline uint8 jpeg_decoder::get_octet()
427 	{
428 		bool padding_flag;
429 		int c = get_char(&padding_flag);
430 
431 		if (c == 0xFF)
432 		{
433 			if (padding_flag)
434 				return 0xFF;
435 
436 			c = get_char(&padding_flag);
437 			if (padding_flag)
438 			{
439 				stuff_char(0xFF);
440 				return 0xFF;
441 			}
442 
443 			if (c == 0x00)
444 				return 0xFF;
445 			else
446 			{
447 				stuff_char(static_cast<uint8>(c));
448 				stuff_char(0xFF);
449 				return 0xFF;
450 			}
451 		}
452 
453 		return static_cast<uint8>(c);
454 	}
455 
456 	// Retrieves a variable number of bits from the input stream. Does not recognize markers.
457 	inline uint jpeg_decoder::get_bits(int num_bits)
458 	{
459 		if (!num_bits)
460 			return 0;
461 
462 		uint i = m_bit_buf >> (32 - num_bits);
463 
464 		if ((m_bits_left -= num_bits) <= 0)
465 		{
466 			m_bit_buf <<= (num_bits += m_bits_left);
467 
468 			uint c1 = get_char();
469 			uint c2 = get_char();
470 			m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
471 
472 			m_bit_buf <<= -m_bits_left;
473 
474 			m_bits_left += 16;
475 
476 			assert(m_bits_left >= 0);
477 		}
478 		else
479 			m_bit_buf <<= num_bits;
480 
481 		return i;
482 	}
483 
484 	// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
485 	inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
486 	{
487 		if (!num_bits)
488 			return 0;
489 
490 		assert(num_bits <= 16);
491 
492 		uint i = m_bit_buf >> (32 - num_bits);
493 
494 		if ((m_bits_left -= num_bits) <= 0)
495 		{
496 			m_bit_buf <<= (num_bits += m_bits_left);
497 
498 			if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
499 			{
500 				uint c1 = get_octet();
501 				uint c2 = get_octet();
502 				m_bit_buf |= (c1 << 8) | c2;
503 			}
504 			else
505 			{
506 				m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
507 				m_in_buf_left -= 2;
508 				m_pIn_buf_ofs += 2;
509 			}
510 
511 			m_bit_buf <<= -m_bits_left;
512 
513 			m_bits_left += 16;
514 
515 			assert(m_bits_left >= 0);
516 		}
517 		else
518 			m_bit_buf <<= num_bits;
519 
520 		return i;
521 	}
522 
523 	// Decodes a Huffman encoded symbol.
524 	inline int jpeg_decoder::huff_decode(huff_tables* pH)
525 	{
526 		if (!pH)
527 			stop_decoding(JPGD_DECODE_ERROR);
528 
529 		int symbol;
530 		// Check first 8-bits: do we have a complete symbol?
531 		if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
532 		{
533 			// Decode more bits, use a tree traversal to find symbol.
534 			int ofs = 23;
535 			do
536 			{
537 				unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
538 
539 				// This should never happen, but to be safe I'm turning these asserts into a run-time check.
540 				if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0))
541 					stop_decoding(JPGD_DECODE_ERROR);
542 
543 				symbol = pH->tree[idx];
544 				ofs--;
545 			} while (symbol < 0);
546 
547 			get_bits_no_markers(8 + (23 - ofs));
548 		}
549 		else
550 		{
551 			assert(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
552 			get_bits_no_markers(pH->code_size[symbol]);
553 		}
554 
555 		return symbol;
556 	}
557 
558 	// Decodes a Huffman encoded symbol.
559 	inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits)
560 	{
561 		int symbol;
562 
563 		if (!pH)
564 			stop_decoding(JPGD_DECODE_ERROR);
565 
566 		// Check first 8-bits: do we have a complete symbol?
567 		if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
568 		{
569 			// Use a tree traversal to find symbol.
570 			int ofs = 23;
571 			do
572 			{
573 				unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
574 
575 				// This should never happen, but to be safe I'm turning these asserts into a run-time check.
576 				if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0))
577 					stop_decoding(JPGD_DECODE_ERROR);
578 
579 				symbol = pH->tree[idx];
580 				ofs--;
581 			} while (symbol < 0);
582 
583 			get_bits_no_markers(8 + (23 - ofs));
584 
585 			extra_bits = get_bits_no_markers(symbol & 0xF);
586 		}
587 		else
588 		{
589 			if (symbol & 0x8000)
590 			{
591 				//get_bits_no_markers((symbol >> 8) & 31);
592 				assert(((symbol >> 8) & 31) <= 15);
593 				get_bits_no_markers((symbol >> 8) & 15);
594 				extra_bits = symbol >> 16;
595 			}
596 			else
597 			{
598 				int code_size = (symbol >> 8) & 31;
599 				int num_extra_bits = symbol & 0xF;
600 				int bits = code_size + num_extra_bits;
601 
602 				if (bits <= 16)
603 					extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
604 				else
605 				{
606 					get_bits_no_markers(code_size);
607 					extra_bits = get_bits_no_markers(num_extra_bits);
608 				}
609 			}
610 
611 			symbol &= 0xFF;
612 		}
613 
614 		return symbol;
615 	}
616 
617 	// Tables and macro used to fully decode the DPCM differences.
618 	static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
619 	static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
620 	//static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) };
621 
622 #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
623 
624 	// Unconditionally frees all allocated m_blocks.
625 	void jpeg_decoder::free_all_blocks()
626 	{
627 		m_pStream = nullptr;
628 		for (mem_block* b = m_pMem_blocks; b; )
629 		{
630 			mem_block* n = b->m_pNext;
631 			jpgd_free(b);
632 			b = n;
633 		}
634 		m_pMem_blocks = nullptr;
635 	}
636 
637 	// This method handles all errors. It will never return.
638 	// It could easily be changed to use C++ exceptions.
639 	JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
640 	{
641 		m_error_code = status;
642 		free_all_blocks();
643 		longjmp(m_jmp_state, status);
644 	}
645 
646 	void* jpeg_decoder::alloc(size_t nSize, bool zero)
647 	{
648 		nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
649 		char* rv = nullptr;
650 		for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext)
651 		{
652 			if ((b->m_used_count + nSize) <= b->m_size)
653 			{
654 				rv = b->m_data + b->m_used_count;
655 				b->m_used_count += nSize;
656 				break;
657 			}
658 		}
659 		if (!rv)
660 		{
661 			int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
662 			mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
663 			if (!b)
664 			{
665 				stop_decoding(JPGD_NOTENOUGHMEM);
666 			}
667 
668 			b->m_pNext = m_pMem_blocks;
669 			m_pMem_blocks = b;
670 			b->m_used_count = nSize;
671 			b->m_size = capacity;
672 			rv = b->m_data;
673 		}
674 		if (zero) memset(rv, 0, nSize);
675 		return rv;
676 	}
677 
678 	void* jpeg_decoder::alloc_aligned(size_t nSize, uint32_t align, bool zero)
679 	{
680 		assert((align >= 1U) && ((align & (align - 1U)) == 0U));
681 		void *p = alloc(nSize + align - 1U, zero);
682 		p = (void *)( ((uintptr_t)p + (align - 1U)) & ~((uintptr_t)(align - 1U)) );
683 		return p;
684 	}
685 
686 	void jpeg_decoder::word_clear(void* p, uint16 c, uint n)
687 	{
688 		uint8* pD = (uint8*)p;
689 		const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
690 		while (n)
691 		{
692 			pD[0] = l;
693 			pD[1] = h;
694 			pD += 2;
695 			n--;
696 		}
697 	}
698 
699 	// Refill the input buffer.
700 	// This method will sit in a loop until (A) the buffer is full or (B)
701 	// the stream's read() method reports and end of file condition.
702 	void jpeg_decoder::prep_in_buffer()
703 	{
704 		m_in_buf_left = 0;
705 		m_pIn_buf_ofs = m_in_buf;
706 
707 		if (m_eof_flag)
708 			return;
709 
710 		do
711 		{
712 			int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
713 			if (bytes_read == -1)
714 				stop_decoding(JPGD_STREAM_READ);
715 
716 			m_in_buf_left += bytes_read;
717 		} while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
718 
719 		m_total_bytes_read += m_in_buf_left;
720 
721 		// Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
722 		// (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
723 		word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
724 	}
725 
726 	// Read a Huffman code table.
727 	void jpeg_decoder::read_dht_marker()
728 	{
729 		int i, index, count;
730 		uint8 huff_num[17];
731 		uint8 huff_val[256];
732 
733 		uint num_left = get_bits(16);
734 
735 		if (num_left < 2)
736 			stop_decoding(JPGD_BAD_DHT_MARKER);
737 
738 		num_left -= 2;
739 
740 		while (num_left)
741 		{
742 			index = get_bits(8);
743 
744 			huff_num[0] = 0;
745 
746 			count = 0;
747 
748 			for (i = 1; i <= 16; i++)
749 			{
750 				huff_num[i] = static_cast<uint8>(get_bits(8));
751 				count += huff_num[i];
752 			}
753 
754 			if (count > 255)
755 				stop_decoding(JPGD_BAD_DHT_COUNTS);
756 
757 			bool symbol_present[256];
758 			memset(symbol_present, 0, sizeof(symbol_present));
759 
760 			for (i = 0; i < count; i++)
761 			{
762 				const int s = get_bits(8);
763 
764 				// Check for obviously bogus tables.
765 				if (symbol_present[s])
766 					stop_decoding(JPGD_BAD_DHT_COUNTS);
767 
768 				huff_val[i] = static_cast<uint8_t>(s);
769 				symbol_present[s] = true;
770 			}
771 
772 			i = 1 + 16 + count;
773 
774 			if (num_left < (uint)i)
775 				stop_decoding(JPGD_BAD_DHT_MARKER);
776 
777 			num_left -= i;
778 
779 			if ((index & 0x10) > 0x10)
780 				stop_decoding(JPGD_BAD_DHT_INDEX);
781 
782 			index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
783 
784 			if (index >= JPGD_MAX_HUFF_TABLES)
785 				stop_decoding(JPGD_BAD_DHT_INDEX);
786 
787 			if (!m_huff_num[index])
788 				m_huff_num[index] = (uint8*)alloc(17);
789 
790 			if (!m_huff_val[index])
791 				m_huff_val[index] = (uint8*)alloc(256);
792 
793 			m_huff_ac[index] = (index & 0x10) != 0;
794 			memcpy(m_huff_num[index], huff_num, 17);
795 			memcpy(m_huff_val[index], huff_val, 256);
796 		}
797 	}
798 
799 	// Read a quantization table.
800 	void jpeg_decoder::read_dqt_marker()
801 	{
802 		int n, i, prec;
803 		uint num_left;
804 		uint temp;
805 
806 		num_left = get_bits(16);
807 
808 		if (num_left < 2)
809 			stop_decoding(JPGD_BAD_DQT_MARKER);
810 
811 		num_left -= 2;
812 
813 		while (num_left)
814 		{
815 			n = get_bits(8);
816 			prec = n >> 4;
817 			n &= 0x0F;
818 
819 			if (n >= JPGD_MAX_QUANT_TABLES)
820 				stop_decoding(JPGD_BAD_DQT_TABLE);
821 
822 			if (!m_quant[n])
823 				m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t));
824 
825 			// read quantization entries, in zag order
826 			for (i = 0; i < 64; i++)
827 			{
828 				temp = get_bits(8);
829 
830 				if (prec)
831 					temp = (temp << 8) + get_bits(8);
832 
833 				m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
834 			}
835 
836 			i = 64 + 1;
837 
838 			if (prec)
839 				i += 64;
840 
841 			if (num_left < (uint)i)
842 				stop_decoding(JPGD_BAD_DQT_LENGTH);
843 
844 			num_left -= i;
845 		}
846 	}
847 
848 	// Read the start of frame (SOF) marker.
849 	void jpeg_decoder::read_sof_marker()
850 	{
851 		int i;
852 		uint num_left;
853 
854 		num_left = get_bits(16);
855 
856 		/* precision: sorry, only 8-bit precision is supported */
857 		if (get_bits(8) != 8)
858 			stop_decoding(JPGD_BAD_PRECISION);
859 
860 		m_image_y_size = get_bits(16);
861 
862 		if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
863 			stop_decoding(JPGD_BAD_HEIGHT);
864 
865 		m_image_x_size = get_bits(16);
866 
867 		if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
868 			stop_decoding(JPGD_BAD_WIDTH);
869 
870 		m_comps_in_frame = get_bits(8);
871 
872 		if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
873 			stop_decoding(JPGD_TOO_MANY_COMPONENTS);
874 
875 		if (num_left != (uint)(m_comps_in_frame * 3 + 8))
876 			stop_decoding(JPGD_BAD_SOF_LENGTH);
877 
878 		for (i = 0; i < m_comps_in_frame; i++)
879 		{
880 			m_comp_ident[i] = get_bits(8);
881 			m_comp_h_samp[i] = get_bits(4);
882 			m_comp_v_samp[i] = get_bits(4);
883 
884 			if (!m_comp_h_samp[i] || !m_comp_v_samp[i] || (m_comp_h_samp[i] > 2) || (m_comp_v_samp[i] > 2))
885 				stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
886 
887 			m_comp_quant[i] = get_bits(8);
888 			if (m_comp_quant[i] >= JPGD_MAX_QUANT_TABLES)
889 				stop_decoding(JPGD_DECODE_ERROR);
890 		}
891 	}
892 
893 	// Used to skip unrecognized markers.
894 	void jpeg_decoder::skip_variable_marker()
895 	{
896 		uint num_left;
897 
898 		num_left = get_bits(16);
899 
900 		if (num_left < 2)
901 			stop_decoding(JPGD_BAD_VARIABLE_MARKER);
902 
903 		num_left -= 2;
904 
905 		while (num_left)
906 		{
907 			get_bits(8);
908 			num_left--;
909 		}
910 	}
911 
912 	// Read a define restart interval (DRI) marker.
913 	void jpeg_decoder::read_dri_marker()
914 	{
915 		if (get_bits(16) != 4)
916 			stop_decoding(JPGD_BAD_DRI_LENGTH);
917 
918 		m_restart_interval = get_bits(16);
919 	}
920 
921 	// Read a start of scan (SOS) marker.
922 	void jpeg_decoder::read_sos_marker()
923 	{
924 		uint num_left;
925 		int i, ci, n, c, cc;
926 
927 		num_left = get_bits(16);
928 
929 		n = get_bits(8);
930 
931 		m_comps_in_scan = n;
932 
933 		num_left -= 3;
934 
935 		if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN))
936 			stop_decoding(JPGD_BAD_SOS_LENGTH);
937 
938 		for (i = 0; i < n; i++)
939 		{
940 			cc = get_bits(8);
941 			c = get_bits(8);
942 			num_left -= 2;
943 
944 			for (ci = 0; ci < m_comps_in_frame; ci++)
945 				if (cc == m_comp_ident[ci])
946 					break;
947 
948 			if (ci >= m_comps_in_frame)
949 				stop_decoding(JPGD_BAD_SOS_COMP_ID);
950 
951 			if (ci >= JPGD_MAX_COMPONENTS)
952 				stop_decoding(JPGD_DECODE_ERROR);
953 
954 			m_comp_list[i] = ci;
955 
956 			m_comp_dc_tab[ci] = (c >> 4) & 15;
957 			m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
958 
959 			if (m_comp_dc_tab[ci] >= JPGD_MAX_HUFF_TABLES)
960 				stop_decoding(JPGD_DECODE_ERROR);
961 
962 			if (m_comp_ac_tab[ci] >= JPGD_MAX_HUFF_TABLES)
963 				stop_decoding(JPGD_DECODE_ERROR);
964 		}
965 
966 		m_spectral_start = get_bits(8);
967 		m_spectral_end = get_bits(8);
968 		m_successive_high = get_bits(4);
969 		m_successive_low = get_bits(4);
970 
971 		if (!m_progressive_flag)
972 		{
973 			m_spectral_start = 0;
974 			m_spectral_end = 63;
975 		}
976 
977 		num_left -= 3;
978 
979 		/* read past whatever is num_left */
980 		while (num_left)
981 		{
982 			get_bits(8);
983 			num_left--;
984 		}
985 	}
986 
987 	// Finds the next marker.
988 	int jpeg_decoder::next_marker()
989 	{
990 		uint c, bytes;
991 
992 		bytes = 0;
993 
994 		do
995 		{
996 			do
997 			{
998 				bytes++;
999 				c = get_bits(8);
1000 			} while (c != 0xFF);
1001 
1002 			do
1003 			{
1004 				c = get_bits(8);
1005 			} while (c == 0xFF);
1006 
1007 		} while (c == 0);
1008 
1009 		// If bytes > 0 here, there where extra bytes before the marker (not good).
1010 
1011 		return c;
1012 	}
1013 
1014 	// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1015 	// encountered.
1016 	int jpeg_decoder::process_markers()
1017 	{
1018 		int c;
1019 
1020 		for (; ; )
1021 		{
1022 			c = next_marker();
1023 
1024 			switch (c)
1025 			{
1026 			case M_SOF0:
1027 			case M_SOF1:
1028 			case M_SOF2:
1029 			case M_SOF3:
1030 			case M_SOF5:
1031 			case M_SOF6:
1032 			case M_SOF7:
1033 				//      case M_JPG:
1034 			case M_SOF9:
1035 			case M_SOF10:
1036 			case M_SOF11:
1037 			case M_SOF13:
1038 			case M_SOF14:
1039 			case M_SOF15:
1040 			case M_SOI:
1041 			case M_EOI:
1042 			case M_SOS:
1043 			{
1044 				return c;
1045 			}
1046 			case M_DHT:
1047 			{
1048 				read_dht_marker();
1049 				break;
1050 			}
1051 			// No arithmitic support - dumb patents!
1052 			case M_DAC:
1053 			{
1054 				stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1055 				break;
1056 			}
1057 			case M_DQT:
1058 			{
1059 				read_dqt_marker();
1060 				break;
1061 			}
1062 			case M_DRI:
1063 			{
1064 				read_dri_marker();
1065 				break;
1066 			}
1067 			//case M_APP0:  /* no need to read the JFIF marker */
1068 			case M_JPG:
1069 			case M_RST0:    /* no parameters */
1070 			case M_RST1:
1071 			case M_RST2:
1072 			case M_RST3:
1073 			case M_RST4:
1074 			case M_RST5:
1075 			case M_RST6:
1076 			case M_RST7:
1077 			case M_TEM:
1078 			{
1079 				stop_decoding(JPGD_UNEXPECTED_MARKER);
1080 				break;
1081 			}
1082 			default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1083 			{
1084 				skip_variable_marker();
1085 				break;
1086 			}
1087 			}
1088 		}
1089 	}
1090 
1091 	// Finds the start of image (SOI) marker.
1092 	void jpeg_decoder::locate_soi_marker()
1093 	{
1094 		uint lastchar, thischar;
1095 		uint bytesleft;
1096 
1097 		lastchar = get_bits(8);
1098 
1099 		thischar = get_bits(8);
1100 
1101 		/* ok if it's a normal JPEG file without a special header */
1102 
1103 		if ((lastchar == 0xFF) && (thischar == M_SOI))
1104 			return;
1105 
1106 		bytesleft = 4096;
1107 
1108 		for (; ; )
1109 		{
1110 			if (--bytesleft == 0)
1111 				stop_decoding(JPGD_NOT_JPEG);
1112 
1113 			lastchar = thischar;
1114 
1115 			thischar = get_bits(8);
1116 
1117 			if (lastchar == 0xFF)
1118 			{
1119 				if (thischar == M_SOI)
1120 					break;
1121 				else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1122 					stop_decoding(JPGD_NOT_JPEG);
1123 			}
1124 		}
1125 
1126 		// Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1127 		thischar = (m_bit_buf >> 24) & 0xFF;
1128 
1129 		if (thischar != 0xFF)
1130 			stop_decoding(JPGD_NOT_JPEG);
1131 	}
1132 
1133 	// Find a start of frame (SOF) marker.
1134 	void jpeg_decoder::locate_sof_marker()
1135 	{
1136 		locate_soi_marker();
1137 
1138 		int c = process_markers();
1139 
1140 		switch (c)
1141 		{
1142 		case M_SOF2:
1143 		{
1144 			m_progressive_flag = JPGD_TRUE;
1145 			read_sof_marker();
1146 			break;
1147 		}
1148 		case M_SOF0:  /* baseline DCT */
1149 		case M_SOF1:  /* extended sequential DCT */
1150 		{
1151 			read_sof_marker();
1152 			break;
1153 		}
1154 		case M_SOF9:  /* Arithmitic coding */
1155 		{
1156 			stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1157 			break;
1158 		}
1159 		default:
1160 		{
1161 			stop_decoding(JPGD_UNSUPPORTED_MARKER);
1162 			break;
1163 		}
1164 		}
1165 	}
1166 
1167 	// Find a start of scan (SOS) marker.
1168 	int jpeg_decoder::locate_sos_marker()
1169 	{
1170 		int c;
1171 
1172 		c = process_markers();
1173 
1174 		if (c == M_EOI)
1175 			return JPGD_FALSE;
1176 		else if (c != M_SOS)
1177 			stop_decoding(JPGD_UNEXPECTED_MARKER);
1178 
1179 		read_sos_marker();
1180 
1181 		return JPGD_TRUE;
1182 	}
1183 
1184 	// Reset everything to default/uninitialized state.
1185 	void jpeg_decoder::init(jpeg_decoder_stream* pStream, uint32_t flags)
1186 	{
1187 		m_flags = flags;
1188 		m_pMem_blocks = nullptr;
1189 		m_error_code = JPGD_SUCCESS;
1190 		m_ready_flag = false;
1191 		m_image_x_size = m_image_y_size = 0;
1192 		m_pStream = pStream;
1193 		m_progressive_flag = JPGD_FALSE;
1194 
1195 		memset(m_huff_ac, 0, sizeof(m_huff_ac));
1196 		memset(m_huff_num, 0, sizeof(m_huff_num));
1197 		memset(m_huff_val, 0, sizeof(m_huff_val));
1198 		memset(m_quant, 0, sizeof(m_quant));
1199 
1200 		m_scan_type = 0;
1201 		m_comps_in_frame = 0;
1202 
1203 		memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
1204 		memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
1205 		memset(m_comp_quant, 0, sizeof(m_comp_quant));
1206 		memset(m_comp_ident, 0, sizeof(m_comp_ident));
1207 		memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
1208 		memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
1209 
1210 		m_comps_in_scan = 0;
1211 		memset(m_comp_list, 0, sizeof(m_comp_list));
1212 		memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
1213 		memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
1214 
1215 		m_spectral_start = 0;
1216 		m_spectral_end = 0;
1217 		m_successive_low = 0;
1218 		m_successive_high = 0;
1219 		m_max_mcu_x_size = 0;
1220 		m_max_mcu_y_size = 0;
1221 		m_blocks_per_mcu = 0;
1222 		m_max_blocks_per_row = 0;
1223 		m_mcus_per_row = 0;
1224 		m_mcus_per_col = 0;
1225 
1226 		memset(m_mcu_org, 0, sizeof(m_mcu_org));
1227 
1228 		m_total_lines_left = 0;
1229 		m_mcu_lines_left = 0;
1230 		m_num_buffered_scanlines = 0;
1231 		m_real_dest_bytes_per_scan_line = 0;
1232 		m_dest_bytes_per_scan_line = 0;
1233 		m_dest_bytes_per_pixel = 0;
1234 
1235 		memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
1236 
1237 		memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
1238 		memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
1239 		memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
1240 
1241 		m_eob_run = 0;
1242 
1243 		m_pIn_buf_ofs = m_in_buf;
1244 		m_in_buf_left = 0;
1245 		m_eof_flag = false;
1246 		m_tem_flag = 0;
1247 
1248 		memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
1249 		memset(m_in_buf, 0, sizeof(m_in_buf));
1250 		memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
1251 
1252 		m_restart_interval = 0;
1253 		m_restarts_left = 0;
1254 		m_next_restart_num = 0;
1255 
1256 		m_max_mcus_per_row = 0;
1257 		m_max_blocks_per_mcu = 0;
1258 		m_max_mcus_per_col = 0;
1259 
1260 		memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
1261 		m_pMCU_coefficients = nullptr;
1262 		m_pSample_buf = nullptr;
1263 		m_pSample_buf_prev = nullptr;
1264 		m_sample_buf_prev_valid = false;
1265 
1266 		m_total_bytes_read = 0;
1267 
1268 		m_pScan_line_0 = nullptr;
1269 		m_pScan_line_1 = nullptr;
1270 
1271 		// Ready the input buffer.
1272 		prep_in_buffer();
1273 
1274 		// Prime the bit buffer.
1275 		m_bits_left = 16;
1276 		m_bit_buf = 0;
1277 
1278 		get_bits(16);
1279 		get_bits(16);
1280 
1281 		for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1282 			m_mcu_block_max_zag[i] = 64;
1283 
1284 		m_has_sse2 = false;
1285 
1286 #if JPGD_USE_SSE2
1287 #ifdef _MSC_VER
1288 		int cpu_info[4];
1289 		__cpuid(cpu_info, 1);
1290 		const int cpu_info3 = cpu_info[3];
1291 		m_has_sse2 = ((cpu_info3 >> 26U) & 1U) != 0U;
1292 #else
1293 		m_has_sse2 = true;
1294 #endif
1295 #endif
1296 	}
1297 
1298 #define SCALEBITS 16
1299 #define ONE_HALF  ((int) 1 << (SCALEBITS-1))
1300 #define FIX(x)    ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
1301 
1302 	// Create a few tables that allow us to quickly convert YCbCr to RGB.
1303 	void jpeg_decoder::create_look_ups()
1304 	{
1305 		for (int i = 0; i <= 255; i++)
1306 		{
1307 			int k = i - 128;
1308 			m_crr[i] = (FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
1309 			m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
1310 			m_crg[i] = (-FIX(0.71414f)) * k;
1311 			m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
1312 		}
1313 	}
1314 
1315 	// This method throws back into the stream any bytes that where read
1316 	// into the bit buffer during initial marker scanning.
1317 	void jpeg_decoder::fix_in_buffer()
1318 	{
1319 		// In case any 0xFF's where pulled into the buffer during marker scanning.
1320 		assert((m_bits_left & 7) == 0);
1321 
1322 		if (m_bits_left == 16)
1323 			stuff_char((uint8)(m_bit_buf & 0xFF));
1324 
1325 		if (m_bits_left >= 8)
1326 			stuff_char((uint8)((m_bit_buf >> 8) & 0xFF));
1327 
1328 		stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
1329 		stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
1330 
1331 		m_bits_left = 16;
1332 		get_bits_no_markers(16);
1333 		get_bits_no_markers(16);
1334 	}
1335 
1336 	void jpeg_decoder::transform_mcu(int mcu_row)
1337 	{
1338 		jpgd_block_coeff_t* pSrc_ptr = m_pMCU_coefficients;
1339 		if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row)
1340 			stop_decoding(JPGD_DECODE_ERROR);
1341 
1342 		uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1343 
1344 		for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1345 		{
1346 			idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block], ((m_flags & cFlagDisableSIMD) == 0) && m_has_sse2);
1347 			pSrc_ptr += 64;
1348 			pDst_ptr += 64;
1349 		}
1350 	}
1351 
1352 	// Loads and dequantizes the next row of (already decoded) coefficients.
1353 	// Progressive images only.
1354 	void jpeg_decoder::load_next_row()
1355 	{
1356 		int i;
1357 		jpgd_block_coeff_t* p;
1358 		jpgd_quant_t* q;
1359 		int mcu_row, mcu_block, row_block = 0;
1360 		int component_num, component_id;
1361 		int block_x_mcu[JPGD_MAX_COMPONENTS];
1362 
1363 		memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
1364 
1365 		for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1366 		{
1367 			int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1368 
1369 			for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1370 			{
1371 				component_id = m_mcu_org[mcu_block];
1372 				if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES)
1373 					stop_decoding(JPGD_DECODE_ERROR);
1374 
1375 				q = m_quant[m_comp_quant[component_id]];
1376 
1377 				p = m_pMCU_coefficients + 64 * mcu_block;
1378 
1379 				jpgd_block_coeff_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1380 				jpgd_block_coeff_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
1381 				p[0] = pDC[0];
1382 				memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_coeff_t));
1383 
1384 				for (i = 63; i > 0; i--)
1385 					if (p[g_ZAG[i]])
1386 						break;
1387 
1388 				m_mcu_block_max_zag[mcu_block] = i + 1;
1389 
1390 				for (; i >= 0; i--)
1391 					if (p[g_ZAG[i]])
1392 						p[g_ZAG[i]] = static_cast<jpgd_block_coeff_t>(p[g_ZAG[i]] * q[i]);
1393 
1394 				row_block++;
1395 
1396 				if (m_comps_in_scan == 1)
1397 					block_x_mcu[component_id]++;
1398 				else
1399 				{
1400 					if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
1401 					{
1402 						block_x_mcu_ofs = 0;
1403 
1404 						if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
1405 						{
1406 							block_y_mcu_ofs = 0;
1407 
1408 							block_x_mcu[component_id] += m_comp_h_samp[component_id];
1409 						}
1410 					}
1411 				}
1412 			}
1413 
1414 			transform_mcu(mcu_row);
1415 		}
1416 
1417 		if (m_comps_in_scan == 1)
1418 			m_block_y_mcu[m_comp_list[0]]++;
1419 		else
1420 		{
1421 			for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1422 			{
1423 				component_id = m_comp_list[component_num];
1424 
1425 				m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
1426 			}
1427 		}
1428 	}
1429 
1430 	// Restart interval processing.
1431 	void jpeg_decoder::process_restart()
1432 	{
1433 		int i;
1434 		int c = 0;
1435 
1436 		// Align to a byte boundry
1437 		// FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1438 		//get_bits_no_markers(m_bits_left & 7);
1439 
1440 		// Let's scan a little bit to find the marker, but not _too_ far.
1441 		// 1536 is a "fudge factor" that determines how much to scan.
1442 		for (i = 1536; i > 0; i--)
1443 			if (get_char() == 0xFF)
1444 				break;
1445 
1446 		if (i == 0)
1447 			stop_decoding(JPGD_BAD_RESTART_MARKER);
1448 
1449 		for (; i > 0; i--)
1450 			if ((c = get_char()) != 0xFF)
1451 				break;
1452 
1453 		if (i == 0)
1454 			stop_decoding(JPGD_BAD_RESTART_MARKER);
1455 
1456 		// Is it the expected marker? If not, something bad happened.
1457 		if (c != (m_next_restart_num + M_RST0))
1458 			stop_decoding(JPGD_BAD_RESTART_MARKER);
1459 
1460 		// Reset each component's DC prediction values.
1461 		memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
1462 
1463 		m_eob_run = 0;
1464 
1465 		m_restarts_left = m_restart_interval;
1466 
1467 		m_next_restart_num = (m_next_restart_num + 1) & 7;
1468 
1469 		// Get the bit buffer going again...
1470 
1471 		m_bits_left = 16;
1472 		get_bits_no_markers(16);
1473 		get_bits_no_markers(16);
1474 	}
1475 
1476 	static inline int dequantize_ac(int c, int q) { c *= q; return c; }
1477 
1478 	// Decodes and dequantizes the next row of coefficients.
1479 	void jpeg_decoder::decode_next_row()
1480 	{
1481 		int row_block = 0;
1482 
1483 		for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1484 		{
1485 			if ((m_restart_interval) && (m_restarts_left == 0))
1486 				process_restart();
1487 
1488 			jpgd_block_coeff_t* p = m_pMCU_coefficients;
1489 			for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1490 			{
1491 				int component_id = m_mcu_org[mcu_block];
1492 				if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES)
1493 					stop_decoding(JPGD_DECODE_ERROR);
1494 
1495 				jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
1496 
1497 				int r, s;
1498 				s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
1499 				if (s >= 16)
1500 					stop_decoding(JPGD_DECODE_ERROR);
1501 
1502 				s = JPGD_HUFF_EXTEND(r, s);
1503 
1504 				m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
1505 
1506 				p[0] = static_cast<jpgd_block_coeff_t>(s * q[0]);
1507 
1508 				int prev_num_set = m_mcu_block_max_zag[mcu_block];
1509 
1510 				huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
1511 
1512 				int k;
1513 				for (k = 1; k < 64; k++)
1514 				{
1515 					int extra_bits;
1516 					s = huff_decode(pH, extra_bits);
1517 
1518 					r = s >> 4;
1519 					s &= 15;
1520 
1521 					if (s)
1522 					{
1523 						if (r)
1524 						{
1525 							if ((k + r) > 63)
1526 								stop_decoding(JPGD_DECODE_ERROR);
1527 
1528 							if (k < prev_num_set)
1529 							{
1530 								int n = JPGD_MIN(r, prev_num_set - k);
1531 								int kt = k;
1532 								while (n--)
1533 									p[g_ZAG[kt++]] = 0;
1534 							}
1535 
1536 							k += r;
1537 						}
1538 
1539 						s = JPGD_HUFF_EXTEND(extra_bits, s);
1540 
1541 						if (k >= 64)
1542 							stop_decoding(JPGD_DECODE_ERROR);
1543 
1544 						p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(dequantize_ac(s, q[k])); //s * q[k];
1545 					}
1546 					else
1547 					{
1548 						if (r == 15)
1549 						{
1550 							if ((k + 16) > 64)
1551 								stop_decoding(JPGD_DECODE_ERROR);
1552 
1553 							if (k < prev_num_set)
1554 							{
1555 								int n = JPGD_MIN(16, prev_num_set - k);
1556 								int kt = k;
1557 								while (n--)
1558 								{
1559 									if (kt > 63)
1560 										stop_decoding(JPGD_DECODE_ERROR);
1561 									p[g_ZAG[kt++]] = 0;
1562 								}
1563 							}
1564 
1565 							k += 16 - 1; // - 1 because the loop counter is k
1566 
1567 							if (p[g_ZAG[k & 63]] != 0)
1568 								stop_decoding(JPGD_DECODE_ERROR);
1569 						}
1570 						else
1571 							break;
1572 					}
1573 				}
1574 
1575 				if (k < prev_num_set)
1576 				{
1577 					int kt = k;
1578 					while (kt < prev_num_set)
1579 						p[g_ZAG[kt++]] = 0;
1580 				}
1581 
1582 				m_mcu_block_max_zag[mcu_block] = k;
1583 
1584 				row_block++;
1585 			}
1586 
1587 			transform_mcu(mcu_row);
1588 
1589 			m_restarts_left--;
1590 		}
1591 	}
1592 
1593 	// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1594 	void jpeg_decoder::H1V1Convert()
1595 	{
1596 		int row = m_max_mcu_y_size - m_mcu_lines_left;
1597 		uint8* d = m_pScan_line_0;
1598 		uint8* s = m_pSample_buf + row * 8;
1599 
1600 		for (int i = m_max_mcus_per_row; i > 0; i--)
1601 		{
1602 			for (int j = 0; j < 8; j++)
1603 			{
1604 				int y = s[j];
1605 				int cb = s[64 + j];
1606 				int cr = s[128 + j];
1607 
1608 				d[0] = clamp(y + m_crr[cr]);
1609 				d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
1610 				d[2] = clamp(y + m_cbb[cb]);
1611 				d[3] = 255;
1612 
1613 				d += 4;
1614 			}
1615 
1616 			s += 64 * 3;
1617 		}
1618 	}
1619 
1620 	// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1621 	void jpeg_decoder::H2V1Convert()
1622 	{
1623 		int row = m_max_mcu_y_size - m_mcu_lines_left;
1624 		uint8* d0 = m_pScan_line_0;
1625 		uint8* y = m_pSample_buf + row * 8;
1626 		uint8* c = m_pSample_buf + 2 * 64 + row * 8;
1627 
1628 		for (int i = m_max_mcus_per_row; i > 0; i--)
1629 		{
1630 			for (int l = 0; l < 2; l++)
1631 			{
1632 				for (int j = 0; j < 4; j++)
1633 				{
1634 					int cb = c[0];
1635 					int cr = c[64];
1636 
1637 					int rc = m_crr[cr];
1638 					int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1639 					int bc = m_cbb[cb];
1640 
1641 					int yy = y[j << 1];
1642 					d0[0] = clamp(yy + rc);
1643 					d0[1] = clamp(yy + gc);
1644 					d0[2] = clamp(yy + bc);
1645 					d0[3] = 255;
1646 
1647 					yy = y[(j << 1) + 1];
1648 					d0[4] = clamp(yy + rc);
1649 					d0[5] = clamp(yy + gc);
1650 					d0[6] = clamp(yy + bc);
1651 					d0[7] = 255;
1652 
1653 					d0 += 8;
1654 
1655 					c++;
1656 				}
1657 				y += 64;
1658 			}
1659 
1660 			y += 64 * 4 - 64 * 2;
1661 			c += 64 * 4 - 8;
1662 		}
1663 	}
1664 
1665 	// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
1666 	void jpeg_decoder::H2V1ConvertFiltered()
1667 	{
1668 		const uint BLOCKS_PER_MCU = 4;
1669 		int row = m_max_mcu_y_size - m_mcu_lines_left;
1670 		uint8* d0 = m_pScan_line_0;
1671 
1672 		const int half_image_x_size = (m_image_x_size >> 1) - 1;
1673 		const int row_x8 = row * 8;
1674 
1675 		for (int x = 0; x < m_image_x_size; x++)
1676 		{
1677 			int y = m_pSample_buf[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + row_x8)];
1678 
1679 			int c_x0 = (x - 1) >> 1;
1680 			int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
1681 			c_x0 = JPGD_MAX(c_x0, 0);
1682 
1683 			int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7) + row_x8 + 128;
1684 			int cb0 = m_pSample_buf[check_sample_buf_ofs(a)];
1685 			int cr0 = m_pSample_buf[check_sample_buf_ofs(a + 64)];
1686 
1687 			int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7) + row_x8 + 128;
1688 			int cb1 = m_pSample_buf[check_sample_buf_ofs(b)];
1689 			int cr1 = m_pSample_buf[check_sample_buf_ofs(b + 64)];
1690 
1691 			int w0 = (x & 1) ? 3 : 1;
1692 			int w1 = (x & 1) ? 1 : 3;
1693 
1694 			int cb = (cb0 * w0 + cb1 * w1 + 2) >> 2;
1695 			int cr = (cr0 * w0 + cr1 * w1 + 2) >> 2;
1696 
1697 			int rc = m_crr[cr];
1698 			int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1699 			int bc = m_cbb[cb];
1700 
1701 			d0[0] = clamp(y + rc);
1702 			d0[1] = clamp(y + gc);
1703 			d0[2] = clamp(y + bc);
1704 			d0[3] = 255;
1705 
1706 			d0 += 4;
1707 		}
1708 	}
1709 
1710 	// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
1711 	void jpeg_decoder::H1V2Convert()
1712 	{
1713 		int row = m_max_mcu_y_size - m_mcu_lines_left;
1714 		uint8* d0 = m_pScan_line_0;
1715 		uint8* d1 = m_pScan_line_1;
1716 		uint8* y;
1717 		uint8* c;
1718 
1719 		if (row < 8)
1720 			y = m_pSample_buf + row * 8;
1721 		else
1722 			y = m_pSample_buf + 64 * 1 + (row & 7) * 8;
1723 
1724 		c = m_pSample_buf + 64 * 2 + (row >> 1) * 8;
1725 
1726 		for (int i = m_max_mcus_per_row; i > 0; i--)
1727 		{
1728 			for (int j = 0; j < 8; j++)
1729 			{
1730 				int cb = c[0 + j];
1731 				int cr = c[64 + j];
1732 
1733 				int rc = m_crr[cr];
1734 				int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1735 				int bc = m_cbb[cb];
1736 
1737 				int yy = y[j];
1738 				d0[0] = clamp(yy + rc);
1739 				d0[1] = clamp(yy + gc);
1740 				d0[2] = clamp(yy + bc);
1741 				d0[3] = 255;
1742 
1743 				yy = y[8 + j];
1744 				d1[0] = clamp(yy + rc);
1745 				d1[1] = clamp(yy + gc);
1746 				d1[2] = clamp(yy + bc);
1747 				d1[3] = 255;
1748 
1749 				d0 += 4;
1750 				d1 += 4;
1751 			}
1752 
1753 			y += 64 * 4;
1754 			c += 64 * 4;
1755 		}
1756 	}
1757 
1758 	// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
1759 	void jpeg_decoder::H1V2ConvertFiltered()
1760 	{
1761 		const uint BLOCKS_PER_MCU = 4;
1762 		int y = m_image_y_size - m_total_lines_left;
1763 		int row = y & 15;
1764 
1765 		const int half_image_y_size = (m_image_y_size >> 1) - 1;
1766 
1767 		uint8* d0 = m_pScan_line_0;
1768 
1769 		const int w0 = (row & 1) ? 3 : 1;
1770 		const int w1 = (row & 1) ? 1 : 3;
1771 
1772 		int c_y0 = (y - 1) >> 1;
1773 		int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size);
1774 
1775 		const uint8_t* p_YSamples = m_pSample_buf;
1776 		const uint8_t* p_C0Samples = m_pSample_buf;
1777 		if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1))
1778 		{
1779 			assert(y > 0);
1780 			assert(m_sample_buf_prev_valid);
1781 
1782 			if ((row & 15) == 15)
1783 				p_YSamples = m_pSample_buf_prev;
1784 
1785 			p_C0Samples = m_pSample_buf_prev;
1786 		}
1787 
1788 		const int y_sample_base_ofs = ((row & 8) ? 64 : 0) + (row & 7) * 8;
1789 		const int y0_base = (c_y0 & 7) * 8 + 128;
1790 		const int y1_base = (c_y1 & 7) * 8 + 128;
1791 
1792 		for (int x = 0; x < m_image_x_size; x++)
1793 		{
1794 			const int base_ofs = (x >> 3) * BLOCKS_PER_MCU * 64 + (x & 7);
1795 
1796 			int y_sample = p_YSamples[check_sample_buf_ofs(base_ofs + y_sample_base_ofs)];
1797 
1798 			int a = base_ofs + y0_base;
1799 			int cb0_sample = p_C0Samples[check_sample_buf_ofs(a)];
1800 			int cr0_sample = p_C0Samples[check_sample_buf_ofs(a + 64)];
1801 
1802 			int b = base_ofs + y1_base;
1803 			int cb1_sample = m_pSample_buf[check_sample_buf_ofs(b)];
1804 			int cr1_sample = m_pSample_buf[check_sample_buf_ofs(b + 64)];
1805 
1806 			int cb = (cb0_sample * w0 + cb1_sample * w1 + 2) >> 2;
1807 			int cr = (cr0_sample * w0 + cr1_sample * w1 + 2) >> 2;
1808 
1809 			int rc = m_crr[cr];
1810 			int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1811 			int bc = m_cbb[cb];
1812 
1813 			d0[0] = clamp(y_sample + rc);
1814 			d0[1] = clamp(y_sample + gc);
1815 			d0[2] = clamp(y_sample + bc);
1816 			d0[3] = 255;
1817 
1818 			d0 += 4;
1819 		}
1820 	}
1821 
1822 	// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
1823 	void jpeg_decoder::H2V2Convert()
1824 	{
1825 		int row = m_max_mcu_y_size - m_mcu_lines_left;
1826 		uint8* d0 = m_pScan_line_0;
1827 		uint8* d1 = m_pScan_line_1;
1828 		uint8* y;
1829 		uint8* c;
1830 
1831 		if (row < 8)
1832 			y = m_pSample_buf + row * 8;
1833 		else
1834 			y = m_pSample_buf + 64 * 2 + (row & 7) * 8;
1835 
1836 		c = m_pSample_buf + 64 * 4 + (row >> 1) * 8;
1837 
1838 		for (int i = m_max_mcus_per_row; i > 0; i--)
1839 		{
1840 			for (int l = 0; l < 2; l++)
1841 			{
1842 				for (int j = 0; j < 8; j += 2)
1843 				{
1844 					int cb = c[0];
1845 					int cr = c[64];
1846 
1847 					int rc = m_crr[cr];
1848 					int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1849 					int bc = m_cbb[cb];
1850 
1851 					int yy = y[j];
1852 					d0[0] = clamp(yy + rc);
1853 					d0[1] = clamp(yy + gc);
1854 					d0[2] = clamp(yy + bc);
1855 					d0[3] = 255;
1856 
1857 					yy = y[j + 1];
1858 					d0[4] = clamp(yy + rc);
1859 					d0[5] = clamp(yy + gc);
1860 					d0[6] = clamp(yy + bc);
1861 					d0[7] = 255;
1862 
1863 					yy = y[j + 8];
1864 					d1[0] = clamp(yy + rc);
1865 					d1[1] = clamp(yy + gc);
1866 					d1[2] = clamp(yy + bc);
1867 					d1[3] = 255;
1868 
1869 					yy = y[j + 8 + 1];
1870 					d1[4] = clamp(yy + rc);
1871 					d1[5] = clamp(yy + gc);
1872 					d1[6] = clamp(yy + bc);
1873 					d1[7] = 255;
1874 
1875 					d0 += 8;
1876 					d1 += 8;
1877 
1878 					c++;
1879 				}
1880 				y += 64;
1881 			}
1882 
1883 			y += 64 * 6 - 64 * 2;
1884 			c += 64 * 6 - 8;
1885 		}
1886 	}
1887 
1888 	uint32_t jpeg_decoder::H2V2ConvertFiltered()
1889 	{
1890 		const uint BLOCKS_PER_MCU = 6;
1891 		int y = m_image_y_size - m_total_lines_left;
1892 		int row = y & 15;
1893 
1894 		const int half_image_y_size = (m_image_y_size >> 1) - 1;
1895 
1896 		uint8* d0 = m_pScan_line_0;
1897 
1898 		int c_y0 = (y - 1) >> 1;
1899 		int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size);
1900 
1901 		const uint8_t* p_YSamples = m_pSample_buf;
1902 		const uint8_t* p_C0Samples = m_pSample_buf;
1903 		if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1))
1904 		{
1905 			assert(y > 0);
1906 			assert(m_sample_buf_prev_valid);
1907 
1908 			if ((row & 15) == 15)
1909 				p_YSamples = m_pSample_buf_prev;
1910 
1911 			p_C0Samples = m_pSample_buf_prev;
1912 		}
1913 
1914 		const int y_sample_base_ofs = ((row & 8) ? 128 : 0) + (row & 7) * 8;
1915 		const int y0_base = (c_y0 & 7) * 8 + 256;
1916 		const int y1_base = (c_y1 & 7) * 8 + 256;
1917 
1918 		const int half_image_x_size = (m_image_x_size >> 1) - 1;
1919 
1920 		static const uint8_t s_muls[2][2][4] =
1921 		{
1922 			{ { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, },
1923 			{ { 3, 1, 9, 3 }, { 9, 3, 3, 1 } }
1924 		};
1925 
1926 		if (((row & 15) >= 1) && ((row & 15) <= 14))
1927 		{
1928 			assert((row & 1) == 1);
1929 			assert(((y + 1 - 1) >> 1) == c_y0);
1930 
1931 			assert(p_YSamples == m_pSample_buf);
1932 			assert(p_C0Samples == m_pSample_buf);
1933 
1934 			uint8* d1 = m_pScan_line_1;
1935 			const int y_sample_base_ofs1 = (((row + 1) & 8) ? 128 : 0) + ((row + 1) & 7) * 8;
1936 
1937 			for (int x = 0; x < m_image_x_size; x++)
1938 			{
1939 				int k = (x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7);
1940 				int y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)];
1941 				int y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)];
1942 
1943 				int c_x0 = (x - 1) >> 1;
1944 				int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
1945 				c_x0 = JPGD_MAX(c_x0, 0);
1946 
1947 				int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7);
1948 				int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)];
1949 				int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)];
1950 
1951 				int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)];
1952 				int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)];
1953 
1954 				int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7);
1955 				int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)];
1956 				int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)];
1957 
1958 				int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)];
1959 				int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)];
1960 
1961 				{
1962 					const uint8_t* pMuls = &s_muls[row & 1][x & 1][0];
1963 					int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
1964 					int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
1965 
1966 					int rc = m_crr[cr];
1967 					int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1968 					int bc = m_cbb[cb];
1969 
1970 					d0[0] = clamp(y_sample0 + rc);
1971 					d0[1] = clamp(y_sample0 + gc);
1972 					d0[2] = clamp(y_sample0 + bc);
1973 					d0[3] = 255;
1974 
1975 					d0 += 4;
1976 				}
1977 
1978 				{
1979 					const uint8_t* pMuls = &s_muls[(row + 1) & 1][x & 1][0];
1980 					int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
1981 					int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
1982 
1983 					int rc = m_crr[cr];
1984 					int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
1985 					int bc = m_cbb[cb];
1986 
1987 					d1[0] = clamp(y_sample1 + rc);
1988 					d1[1] = clamp(y_sample1 + gc);
1989 					d1[2] = clamp(y_sample1 + bc);
1990 					d1[3] = 255;
1991 
1992 					d1 += 4;
1993 				}
1994 
1995 				if (((x & 1) == 1) && (x < m_image_x_size - 1))
1996 				{
1997 					const int nx = x + 1;
1998 					assert(c_x0 == (nx - 1) >> 1);
1999 
2000 					k = (nx >> 4) * BLOCKS_PER_MCU * 64 + ((nx & 8) ? 64 : 0) + (nx & 7);
2001 					y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)];
2002 					y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)];
2003 
2004 					{
2005 						const uint8_t* pMuls = &s_muls[row & 1][nx & 1][0];
2006 						int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
2007 						int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
2008 
2009 						int rc = m_crr[cr];
2010 						int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2011 						int bc = m_cbb[cb];
2012 
2013 						d0[0] = clamp(y_sample0 + rc);
2014 						d0[1] = clamp(y_sample0 + gc);
2015 						d0[2] = clamp(y_sample0 + bc);
2016 						d0[3] = 255;
2017 
2018 						d0 += 4;
2019 					}
2020 
2021 					{
2022 						const uint8_t* pMuls = &s_muls[(row + 1) & 1][nx & 1][0];
2023 						int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
2024 						int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
2025 
2026 						int rc = m_crr[cr];
2027 						int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2028 						int bc = m_cbb[cb];
2029 
2030 						d1[0] = clamp(y_sample1 + rc);
2031 						d1[1] = clamp(y_sample1 + gc);
2032 						d1[2] = clamp(y_sample1 + bc);
2033 						d1[3] = 255;
2034 
2035 						d1 += 4;
2036 					}
2037 
2038 					++x;
2039 				}
2040 			}
2041 
2042 			return 2;
2043 		}
2044 		else
2045 		{
2046 			for (int x = 0; x < m_image_x_size; x++)
2047 			{
2048 				int y_sample = p_YSamples[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + y_sample_base_ofs)];
2049 
2050 				int c_x0 = (x - 1) >> 1;
2051 				int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
2052 				c_x0 = JPGD_MAX(c_x0, 0);
2053 
2054 				int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7);
2055 				int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)];
2056 				int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)];
2057 
2058 				int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)];
2059 				int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)];
2060 
2061 				int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7);
2062 				int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)];
2063 				int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)];
2064 
2065 				int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)];
2066 				int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)];
2067 
2068 				const uint8_t* pMuls = &s_muls[row & 1][x & 1][0];
2069 				int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
2070 				int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
2071 
2072 				int rc = m_crr[cr];
2073 				int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
2074 				int bc = m_cbb[cb];
2075 
2076 				d0[0] = clamp(y_sample + rc);
2077 				d0[1] = clamp(y_sample + gc);
2078 				d0[2] = clamp(y_sample + bc);
2079 				d0[3] = 255;
2080 
2081 				d0 += 4;
2082 			}
2083 
2084 			return 1;
2085 		}
2086 	}
2087 
2088 	// Y (1 block per MCU) to 8-bit grayscale
2089 	void jpeg_decoder::gray_convert()
2090 	{
2091 		int row = m_max_mcu_y_size - m_mcu_lines_left;
2092 		uint8* d = m_pScan_line_0;
2093 		uint8* s = m_pSample_buf + row * 8;
2094 
2095 		for (int i = m_max_mcus_per_row; i > 0; i--)
2096 		{
2097 			*(uint*)d = *(uint*)s;
2098 			*(uint*)(&d[4]) = *(uint*)(&s[4]);
2099 
2100 			s += 64;
2101 			d += 8;
2102 		}
2103 	}
2104 
2105 	// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2106 	void jpeg_decoder::find_eoi()
2107 	{
2108 		if (!m_progressive_flag)
2109 		{
2110 			// Attempt to read the EOI marker.
2111 			//get_bits_no_markers(m_bits_left & 7);
2112 
2113 			// Prime the bit buffer
2114 			m_bits_left = 16;
2115 			get_bits(16);
2116 			get_bits(16);
2117 
2118 			// The next marker _should_ be EOI
2119 			process_markers();
2120 		}
2121 
2122 		m_total_bytes_read -= m_in_buf_left;
2123 	}
2124 
2125 	int jpeg_decoder::decode_next_mcu_row()
2126 	{
2127 		if (::setjmp(m_jmp_state))
2128 			return JPGD_FAILED;
2129 
2130 		const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
2131 		if (chroma_y_filtering)
2132 		{
2133 			std::swap(m_pSample_buf, m_pSample_buf_prev);
2134 
2135 			m_sample_buf_prev_valid = true;
2136 		}
2137 
2138 		if (m_progressive_flag)
2139 			load_next_row();
2140 		else
2141 			decode_next_row();
2142 
2143 		// Find the EOI marker if that was the last row.
2144 		if (m_total_lines_left <= m_max_mcu_y_size)
2145 			find_eoi();
2146 
2147 		m_mcu_lines_left = m_max_mcu_y_size;
2148 		return 0;
2149 	}
2150 
2151 	int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
2152 	{
2153 		if ((m_error_code) || (!m_ready_flag))
2154 			return JPGD_FAILED;
2155 
2156 		if (m_total_lines_left == 0)
2157 			return JPGD_DONE;
2158 
2159 		const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
2160 
2161 		bool get_another_mcu_row = false;
2162 		bool got_mcu_early = false;
2163 		if (chroma_y_filtering)
2164 		{
2165 			if (m_total_lines_left == m_image_y_size)
2166 				get_another_mcu_row = true;
2167 			else if ((m_mcu_lines_left == 1) && (m_total_lines_left > 1))
2168 			{
2169 				get_another_mcu_row = true;
2170 				got_mcu_early = true;
2171 			}
2172 		}
2173 		else
2174 		{
2175 			get_another_mcu_row = (m_mcu_lines_left == 0);
2176 		}
2177 
2178 		if (get_another_mcu_row)
2179 		{
2180 			int status = decode_next_mcu_row();
2181 			if (status != 0)
2182 				return status;
2183 		}
2184 
2185 		switch (m_scan_type)
2186 		{
2187 		case JPGD_YH2V2:
2188 		{
2189 			if ((m_flags & cFlagBoxChromaFiltering) == 0)
2190 			{
2191 				if (m_num_buffered_scanlines == 1)
2192 				{
2193 					*pScan_line = m_pScan_line_1;
2194 				}
2195 				else if (m_num_buffered_scanlines == 0)
2196 				{
2197 					m_num_buffered_scanlines = H2V2ConvertFiltered();
2198 					*pScan_line = m_pScan_line_0;
2199 				}
2200 
2201 				m_num_buffered_scanlines--;
2202 			}
2203 			else
2204 			{
2205 				if ((m_mcu_lines_left & 1) == 0)
2206 				{
2207 					H2V2Convert();
2208 					*pScan_line = m_pScan_line_0;
2209 				}
2210 				else
2211 					*pScan_line = m_pScan_line_1;
2212 			}
2213 
2214 			break;
2215 		}
2216 		case JPGD_YH2V1:
2217 		{
2218 			if ((m_flags & cFlagBoxChromaFiltering) == 0)
2219 				H2V1ConvertFiltered();
2220 			else
2221 				H2V1Convert();
2222 			*pScan_line = m_pScan_line_0;
2223 			break;
2224 		}
2225 		case JPGD_YH1V2:
2226 		{
2227 			if (chroma_y_filtering)
2228 			{
2229 				H1V2ConvertFiltered();
2230 				*pScan_line = m_pScan_line_0;
2231 			}
2232 			else
2233 			{
2234 				if ((m_mcu_lines_left & 1) == 0)
2235 				{
2236 					H1V2Convert();
2237 					*pScan_line = m_pScan_line_0;
2238 				}
2239 				else
2240 					*pScan_line = m_pScan_line_1;
2241 			}
2242 
2243 			break;
2244 		}
2245 		case JPGD_YH1V1:
2246 		{
2247 			H1V1Convert();
2248 			*pScan_line = m_pScan_line_0;
2249 			break;
2250 		}
2251 		case JPGD_GRAYSCALE:
2252 		{
2253 			gray_convert();
2254 			*pScan_line = m_pScan_line_0;
2255 
2256 			break;
2257 		}
2258 		}
2259 
2260 		*pScan_line_len = m_real_dest_bytes_per_scan_line;
2261 
2262 		if (!got_mcu_early)
2263 		{
2264 			m_mcu_lines_left--;
2265 		}
2266 
2267 		m_total_lines_left--;
2268 
2269 		return JPGD_SUCCESS;
2270 	}
2271 
2272 	// Creates the tables needed for efficient Huffman decoding.
2273 	void jpeg_decoder::make_huff_table(int index, huff_tables* pH)
2274 	{
2275 		int p, i, l, si;
2276 		uint8 huffsize[258];
2277 		uint huffcode[258];
2278 		uint code;
2279 		uint subtree;
2280 		int code_size;
2281 		int lastp;
2282 		int nextfreeentry;
2283 		int currententry;
2284 
2285 		pH->ac_table = m_huff_ac[index] != 0;
2286 
2287 		p = 0;
2288 
2289 		for (l = 1; l <= 16; l++)
2290 		{
2291 			for (i = 1; i <= m_huff_num[index][l]; i++)
2292 			{
2293 				if (p >= 257)
2294 					stop_decoding(JPGD_DECODE_ERROR);
2295 				huffsize[p++] = static_cast<uint8>(l);
2296 			}
2297 		}
2298 
2299 		assert(p < 258);
2300 		huffsize[p] = 0;
2301 
2302 		lastp = p;
2303 
2304 		code = 0;
2305 		si = huffsize[0];
2306 		p = 0;
2307 
2308 		while (huffsize[p])
2309 		{
2310 			while (huffsize[p] == si)
2311 			{
2312 				if (p >= 257)
2313 					stop_decoding(JPGD_DECODE_ERROR);
2314 				huffcode[p++] = code;
2315 				code++;
2316 			}
2317 
2318 			code <<= 1;
2319 			si++;
2320 		}
2321 
2322 		memset(pH->look_up, 0, sizeof(pH->look_up));
2323 		memset(pH->look_up2, 0, sizeof(pH->look_up2));
2324 		memset(pH->tree, 0, sizeof(pH->tree));
2325 		memset(pH->code_size, 0, sizeof(pH->code_size));
2326 
2327 		nextfreeentry = -1;
2328 
2329 		p = 0;
2330 
2331 		while (p < lastp)
2332 		{
2333 			i = m_huff_val[index][p];
2334 
2335 			code = huffcode[p];
2336 			code_size = huffsize[p];
2337 
2338 			assert(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
2339 			pH->code_size[i] = static_cast<uint8>(code_size);
2340 
2341 			if (code_size <= 8)
2342 			{
2343 				code <<= (8 - code_size);
2344 
2345 				for (l = 1 << (8 - code_size); l > 0; l--)
2346 				{
2347 					if (code >= 256)
2348 						stop_decoding(JPGD_DECODE_ERROR);
2349 
2350 					pH->look_up[code] = i;
2351 
2352 					bool has_extrabits = false;
2353 					int extra_bits = 0;
2354 					int num_extra_bits = i & 15;
2355 
2356 					int bits_to_fetch = code_size;
2357 					if (num_extra_bits)
2358 					{
2359 						int total_codesize = code_size + num_extra_bits;
2360 						if (total_codesize <= 8)
2361 						{
2362 							has_extrabits = true;
2363 							extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2364 
2365 							if (extra_bits > 0x7FFF)
2366 								stop_decoding(JPGD_DECODE_ERROR);
2367 
2368 							bits_to_fetch += num_extra_bits;
2369 						}
2370 					}
2371 
2372 					if (!has_extrabits)
2373 						pH->look_up2[code] = i | (bits_to_fetch << 8);
2374 					else
2375 						pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2376 
2377 					code++;
2378 				}
2379 			}
2380 			else
2381 			{
2382 				subtree = (code >> (code_size - 8)) & 0xFF;
2383 
2384 				currententry = pH->look_up[subtree];
2385 
2386 				if (currententry == 0)
2387 				{
2388 					pH->look_up[subtree] = currententry = nextfreeentry;
2389 					pH->look_up2[subtree] = currententry = nextfreeentry;
2390 
2391 					nextfreeentry -= 2;
2392 				}
2393 
2394 				code <<= (16 - (code_size - 8));
2395 
2396 				for (l = code_size; l > 9; l--)
2397 				{
2398 					if ((code & 0x8000) == 0)
2399 						currententry--;
2400 
2401 					unsigned int idx = -currententry - 1;
2402 
2403 					if (idx >= JPGD_HUFF_TREE_MAX_LENGTH)
2404 						stop_decoding(JPGD_DECODE_ERROR);
2405 
2406 					if (pH->tree[idx] == 0)
2407 					{
2408 						pH->tree[idx] = nextfreeentry;
2409 
2410 						currententry = nextfreeentry;
2411 
2412 						nextfreeentry -= 2;
2413 					}
2414 					else
2415 					{
2416 						currententry = pH->tree[idx];
2417 					}
2418 
2419 					code <<= 1;
2420 				}
2421 
2422 				if ((code & 0x8000) == 0)
2423 					currententry--;
2424 
2425 				if ((-currententry - 1) >= JPGD_HUFF_TREE_MAX_LENGTH)
2426 					stop_decoding(JPGD_DECODE_ERROR);
2427 
2428 				pH->tree[-currententry - 1] = i;
2429 			}
2430 
2431 			p++;
2432 		}
2433 	}
2434 
2435 	// Verifies the quantization tables needed for this scan are available.
2436 	void jpeg_decoder::check_quant_tables()
2437 	{
2438 		for (int i = 0; i < m_comps_in_scan; i++)
2439 			if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr)
2440 				stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2441 	}
2442 
2443 	// Verifies that all the Huffman tables needed for this scan are available.
2444 	void jpeg_decoder::check_huff_tables()
2445 	{
2446 		for (int i = 0; i < m_comps_in_scan; i++)
2447 		{
2448 			if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr))
2449 				stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2450 
2451 			if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr))
2452 				stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2453 		}
2454 
2455 		for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2456 			if (m_huff_num[i])
2457 			{
2458 				if (!m_pHuff_tabs[i])
2459 					m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables));
2460 
2461 				make_huff_table(i, m_pHuff_tabs[i]);
2462 			}
2463 	}
2464 
2465 	// Determines the component order inside each MCU.
2466 	// Also calcs how many MCU's are on each row, etc.
2467 	bool jpeg_decoder::calc_mcu_block_order()
2468 	{
2469 		int component_num, component_id;
2470 		int max_h_samp = 0, max_v_samp = 0;
2471 
2472 		for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2473 		{
2474 			if (m_comp_h_samp[component_id] > max_h_samp)
2475 				max_h_samp = m_comp_h_samp[component_id];
2476 
2477 			if (m_comp_v_samp[component_id] > max_v_samp)
2478 				max_v_samp = m_comp_v_samp[component_id];
2479 		}
2480 
2481 		for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2482 		{
2483 			m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2484 			m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2485 		}
2486 
2487 		if (m_comps_in_scan == 1)
2488 		{
2489 			m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
2490 			m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
2491 		}
2492 		else
2493 		{
2494 			m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2495 			m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2496 		}
2497 
2498 		if (m_comps_in_scan == 1)
2499 		{
2500 			m_mcu_org[0] = m_comp_list[0];
2501 
2502 			m_blocks_per_mcu = 1;
2503 		}
2504 		else
2505 		{
2506 			m_blocks_per_mcu = 0;
2507 
2508 			for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2509 			{
2510 				int num_blocks;
2511 
2512 				component_id = m_comp_list[component_num];
2513 
2514 				num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
2515 
2516 				while (num_blocks--)
2517 					m_mcu_org[m_blocks_per_mcu++] = component_id;
2518 			}
2519 		}
2520 
2521 		if (m_blocks_per_mcu > m_max_blocks_per_mcu)
2522 			return false;
2523 
2524 		for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2525 		{
2526 			int comp_id = m_mcu_org[mcu_block];
2527 			if (comp_id >= JPGD_MAX_QUANT_TABLES)
2528 				return false;
2529 		}
2530 
2531 		return true;
2532 	}
2533 
2534 	// Starts a new scan.
2535 	int jpeg_decoder::init_scan()
2536 	{
2537 		if (!locate_sos_marker())
2538 			return JPGD_FALSE;
2539 
2540 		if (!calc_mcu_block_order())
2541 			return JPGD_FALSE;
2542 
2543 		check_huff_tables();
2544 
2545 		check_quant_tables();
2546 
2547 		memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
2548 
2549 		m_eob_run = 0;
2550 
2551 		if (m_restart_interval)
2552 		{
2553 			m_restarts_left = m_restart_interval;
2554 			m_next_restart_num = 0;
2555 		}
2556 
2557 		fix_in_buffer();
2558 
2559 		return JPGD_TRUE;
2560 	}
2561 
2562 	// Starts a frame. Determines if the number of components or sampling factors
2563 	// are supported.
2564 	void jpeg_decoder::init_frame()
2565 	{
2566 		int i;
2567 
2568 		if (m_comps_in_frame == 1)
2569 		{
2570 			if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
2571 				stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2572 
2573 			m_scan_type = JPGD_GRAYSCALE;
2574 			m_max_blocks_per_mcu = 1;
2575 			m_max_mcu_x_size = 8;
2576 			m_max_mcu_y_size = 8;
2577 		}
2578 		else if (m_comps_in_frame == 3)
2579 		{
2580 			if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
2581 				((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)))
2582 				stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2583 
2584 			if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
2585 			{
2586 				m_scan_type = JPGD_YH1V1;
2587 
2588 				m_max_blocks_per_mcu = 3;
2589 				m_max_mcu_x_size = 8;
2590 				m_max_mcu_y_size = 8;
2591 			}
2592 			else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
2593 			{
2594 				m_scan_type = JPGD_YH2V1;
2595 				m_max_blocks_per_mcu = 4;
2596 				m_max_mcu_x_size = 16;
2597 				m_max_mcu_y_size = 8;
2598 			}
2599 			else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
2600 			{
2601 				m_scan_type = JPGD_YH1V2;
2602 				m_max_blocks_per_mcu = 4;
2603 				m_max_mcu_x_size = 8;
2604 				m_max_mcu_y_size = 16;
2605 			}
2606 			else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
2607 			{
2608 				m_scan_type = JPGD_YH2V2;
2609 				m_max_blocks_per_mcu = 6;
2610 				m_max_mcu_x_size = 16;
2611 				m_max_mcu_y_size = 16;
2612 			}
2613 			else
2614 				stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2615 		}
2616 		else
2617 			stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2618 
2619 		m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2620 		m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2621 
2622 		// These values are for the *destination* pixels: after conversion.
2623 		if (m_scan_type == JPGD_GRAYSCALE)
2624 			m_dest_bytes_per_pixel = 1;
2625 		else
2626 			m_dest_bytes_per_pixel = 4;
2627 
2628 		m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2629 
2630 		m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2631 
2632 		// Initialize two scan line buffers.
2633 		m_pScan_line_0 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
2634 		if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2635 			m_pScan_line_1 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
2636 
2637 		m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2638 
2639 		// Should never happen
2640 		if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2641 			stop_decoding(JPGD_DECODE_ERROR);
2642 
2643 		// Allocate the coefficient buffer, enough for one MCU
2644 		m_pMCU_coefficients = (jpgd_block_coeff_t *)alloc_aligned(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_coeff_t));
2645 
2646 		for (i = 0; i < m_max_blocks_per_mcu; i++)
2647 			m_mcu_block_max_zag[i] = 64;
2648 
2649 		m_pSample_buf = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
2650 		m_pSample_buf_prev = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
2651 
2652 		m_total_lines_left = m_image_y_size;
2653 
2654 		m_mcu_lines_left = 0;
2655 
2656 		create_look_ups();
2657 	}
2658 
2659 	// The coeff_buf series of methods originally stored the coefficients
2660 	// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2661 	// was used to make this process more efficient. Now, we can store the entire
2662 	// thing in RAM.
2663 	jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
2664 	{
2665 		coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
2666 
2667 		cb->block_num_x = block_num_x;
2668 		cb->block_num_y = block_num_y;
2669 		cb->block_len_x = block_len_x;
2670 		cb->block_len_y = block_len_y;
2671 		cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_coeff_t);
2672 		cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true);
2673 		return cb;
2674 	}
2675 
2676 	inline jpgd_block_coeff_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y)
2677 	{
2678 		if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y))
2679 			stop_decoding(JPGD_DECODE_ERROR);
2680 
2681 		return (jpgd_block_coeff_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
2682 	}
2683 
2684 	// The following methods decode the various types of m_blocks encountered
2685 	// in progressively encoded images.
2686 	void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y)
2687 	{
2688 		int s, r;
2689 		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2690 
2691 		if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
2692 		{
2693 			if (s >= 16)
2694 				pD->stop_decoding(JPGD_DECODE_ERROR);
2695 
2696 			r = pD->get_bits_no_markers(s);
2697 			s = JPGD_HUFF_EXTEND(r, s);
2698 		}
2699 
2700 		pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
2701 
2702 		p[0] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
2703 	}
2704 
2705 	void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y)
2706 	{
2707 		if (pD->get_bits_no_markers(1))
2708 		{
2709 			jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
2710 
2711 			p[0] |= (1 << pD->m_successive_low);
2712 		}
2713 	}
2714 
2715 	void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y)
2716 	{
2717 		int k, s, r;
2718 
2719 		if (pD->m_eob_run)
2720 		{
2721 			pD->m_eob_run--;
2722 			return;
2723 		}
2724 
2725 		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2726 
2727 		for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
2728 		{
2729 			unsigned int idx = pD->m_comp_ac_tab[component_id];
2730 			if (idx >= JPGD_MAX_HUFF_TABLES)
2731 				pD->stop_decoding(JPGD_DECODE_ERROR);
2732 
2733 			s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
2734 
2735 			r = s >> 4;
2736 			s &= 15;
2737 
2738 			if (s)
2739 			{
2740 				if ((k += r) > 63)
2741 					pD->stop_decoding(JPGD_DECODE_ERROR);
2742 
2743 				r = pD->get_bits_no_markers(s);
2744 				s = JPGD_HUFF_EXTEND(r, s);
2745 
2746 				p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
2747 			}
2748 			else
2749 			{
2750 				if (r == 15)
2751 				{
2752 					if ((k += 15) > 63)
2753 						pD->stop_decoding(JPGD_DECODE_ERROR);
2754 				}
2755 				else
2756 				{
2757 					pD->m_eob_run = 1 << r;
2758 
2759 					if (r)
2760 						pD->m_eob_run += pD->get_bits_no_markers(r);
2761 
2762 					pD->m_eob_run--;
2763 
2764 					break;
2765 				}
2766 			}
2767 		}
2768 	}
2769 
2770 	void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y)
2771 	{
2772 		int s, k, r;
2773 
2774 		int p1 = 1 << pD->m_successive_low;
2775 
2776 		//int m1 = (-1) << pD->m_successive_low;
2777 		int m1 = static_cast<int>((UINT32_MAX << pD->m_successive_low));
2778 
2779 		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
2780 		if (pD->m_spectral_end > 63)
2781 			pD->stop_decoding(JPGD_DECODE_ERROR);
2782 
2783 		k = pD->m_spectral_start;
2784 
2785 		if (pD->m_eob_run == 0)
2786 		{
2787 			for (; k <= pD->m_spectral_end; k++)
2788 			{
2789 				unsigned int idx = pD->m_comp_ac_tab[component_id];
2790 				if (idx >= JPGD_MAX_HUFF_TABLES)
2791 					pD->stop_decoding(JPGD_DECODE_ERROR);
2792 
2793 				s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
2794 
2795 				r = s >> 4;
2796 				s &= 15;
2797 
2798 				if (s)
2799 				{
2800 					if (s != 1)
2801 						pD->stop_decoding(JPGD_DECODE_ERROR);
2802 
2803 					if (pD->get_bits_no_markers(1))
2804 						s = p1;
2805 					else
2806 						s = m1;
2807 				}
2808 				else
2809 				{
2810 					if (r != 15)
2811 					{
2812 						pD->m_eob_run = 1 << r;
2813 
2814 						if (r)
2815 							pD->m_eob_run += pD->get_bits_no_markers(r);
2816 
2817 						break;
2818 					}
2819 				}
2820 
2821 				do
2822 				{
2823 					jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63];
2824 
2825 					if (*this_coef != 0)
2826 					{
2827 						if (pD->get_bits_no_markers(1))
2828 						{
2829 							if ((*this_coef & p1) == 0)
2830 							{
2831 								if (*this_coef >= 0)
2832 									*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
2833 								else
2834 									*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
2835 							}
2836 						}
2837 					}
2838 					else
2839 					{
2840 						if (--r < 0)
2841 							break;
2842 					}
2843 
2844 					k++;
2845 
2846 				} while (k <= pD->m_spectral_end);
2847 
2848 				if ((s) && (k < 64))
2849 				{
2850 					p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s);
2851 				}
2852 			}
2853 		}
2854 
2855 		if (pD->m_eob_run > 0)
2856 		{
2857 			for (; k <= pD->m_spectral_end; k++)
2858 			{
2859 				jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2860 
2861 				if (*this_coef != 0)
2862 				{
2863 					if (pD->get_bits_no_markers(1))
2864 					{
2865 						if ((*this_coef & p1) == 0)
2866 						{
2867 							if (*this_coef >= 0)
2868 								*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
2869 							else
2870 								*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
2871 						}
2872 					}
2873 				}
2874 			}
2875 
2876 			pD->m_eob_run--;
2877 		}
2878 	}
2879 
2880 	// Decode a scan in a progressively encoded image.
2881 	void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
2882 	{
2883 		int mcu_row, mcu_col, mcu_block;
2884 		int block_x_mcu[JPGD_MAX_COMPONENTS], block_y_mcu[JPGD_MAX_COMPONENTS];
2885 
2886 		memset(block_y_mcu, 0, sizeof(block_y_mcu));
2887 
2888 		for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2889 		{
2890 			int component_num, component_id;
2891 
2892 			memset(block_x_mcu, 0, sizeof(block_x_mcu));
2893 
2894 			for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2895 			{
2896 				int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2897 
2898 				if ((m_restart_interval) && (m_restarts_left == 0))
2899 					process_restart();
2900 
2901 				for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2902 				{
2903 					component_id = m_mcu_org[mcu_block];
2904 
2905 					decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, block_y_mcu[component_id] + block_y_mcu_ofs);
2906 
2907 					if (m_comps_in_scan == 1)
2908 						block_x_mcu[component_id]++;
2909 					else
2910 					{
2911 						if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
2912 						{
2913 							block_x_mcu_ofs = 0;
2914 
2915 							if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
2916 							{
2917 								block_y_mcu_ofs = 0;
2918 								block_x_mcu[component_id] += m_comp_h_samp[component_id];
2919 							}
2920 						}
2921 					}
2922 				}
2923 
2924 				m_restarts_left--;
2925 			}
2926 
2927 			if (m_comps_in_scan == 1)
2928 				block_y_mcu[m_comp_list[0]]++;
2929 			else
2930 			{
2931 				for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2932 				{
2933 					component_id = m_comp_list[component_num];
2934 					block_y_mcu[component_id] += m_comp_v_samp[component_id];
2935 				}
2936 			}
2937 		}
2938 	}
2939 
2940 	// Decode a progressively encoded image.
2941 	void jpeg_decoder::init_progressive()
2942 	{
2943 		int i;
2944 
2945 		if (m_comps_in_frame == 4)
2946 			stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2947 
2948 		// Allocate the coefficient buffers.
2949 		for (i = 0; i < m_comps_in_frame; i++)
2950 		{
2951 			m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
2952 			m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
2953 		}
2954 
2955 		// See https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf
2956 		uint32_t total_scans = 0;
2957 		const uint32_t MAX_SCANS_TO_PROCESS = 1000;
2958 
2959 		for (; ; )
2960 		{
2961 			int dc_only_scan, refinement_scan;
2962 			pDecode_block_func decode_block_func;
2963 
2964 			if (!init_scan())
2965 				break;
2966 
2967 			dc_only_scan = (m_spectral_start == 0);
2968 			refinement_scan = (m_successive_high != 0);
2969 
2970 			if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2971 				stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2972 
2973 			if (dc_only_scan)
2974 			{
2975 				if (m_spectral_end)
2976 					stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2977 			}
2978 			else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2979 				stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2980 
2981 			if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2982 				stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2983 
2984 			if (dc_only_scan)
2985 			{
2986 				if (refinement_scan)
2987 					decode_block_func = decode_block_dc_refine;
2988 				else
2989 					decode_block_func = decode_block_dc_first;
2990 			}
2991 			else
2992 			{
2993 				if (refinement_scan)
2994 					decode_block_func = decode_block_ac_refine;
2995 				else
2996 					decode_block_func = decode_block_ac_first;
2997 			}
2998 
2999 			decode_scan(decode_block_func);
3000 
3001 			m_bits_left = 16;
3002 			get_bits(16);
3003 			get_bits(16);
3004 
3005 			total_scans++;
3006 			if (total_scans > MAX_SCANS_TO_PROCESS)
3007 				stop_decoding(JPGD_TOO_MANY_SCANS);
3008 		}
3009 
3010 		m_comps_in_scan = m_comps_in_frame;
3011 
3012 		for (i = 0; i < m_comps_in_frame; i++)
3013 			m_comp_list[i] = i;
3014 
3015 		if (!calc_mcu_block_order())
3016 			stop_decoding(JPGD_DECODE_ERROR);
3017 	}
3018 
3019 	void jpeg_decoder::init_sequential()
3020 	{
3021 		if (!init_scan())
3022 			stop_decoding(JPGD_UNEXPECTED_MARKER);
3023 	}
3024 
3025 	void jpeg_decoder::decode_start()
3026 	{
3027 		init_frame();
3028 
3029 		if (m_progressive_flag)
3030 			init_progressive();
3031 		else
3032 			init_sequential();
3033 	}
3034 
3035 	void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream, uint32_t flags)
3036 	{
3037 		init(pStream, flags);
3038 		locate_sof_marker();
3039 	}
3040 
3041 	jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags)
3042 	{
3043 		if (::setjmp(m_jmp_state))
3044 			return;
3045 		decode_init(pStream, flags);
3046 	}
3047 
3048 	int jpeg_decoder::begin_decoding()
3049 	{
3050 		if (m_ready_flag)
3051 			return JPGD_SUCCESS;
3052 
3053 		if (m_error_code)
3054 			return JPGD_FAILED;
3055 
3056 		if (::setjmp(m_jmp_state))
3057 			return JPGD_FAILED;
3058 
3059 		decode_start();
3060 
3061 		m_ready_flag = true;
3062 
3063 		return JPGD_SUCCESS;
3064 	}
3065 
3066 	jpeg_decoder::~jpeg_decoder()
3067 	{
3068 		free_all_blocks();
3069 	}
3070 
3071 	jpeg_decoder_file_stream::jpeg_decoder_file_stream()
3072 	{
3073 		m_pFile = nullptr;
3074 		m_eof_flag = false;
3075 		m_error_flag = false;
3076 	}
3077 
3078 	void jpeg_decoder_file_stream::close()
3079 	{
3080 		if (m_pFile)
3081 		{
3082 			fclose(m_pFile);
3083 			m_pFile = nullptr;
3084 		}
3085 
3086 		m_eof_flag = false;
3087 		m_error_flag = false;
3088 	}
3089 
3090 	jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
3091 	{
3092 		close();
3093 	}
3094 
3095 	bool jpeg_decoder_file_stream::open(const char* Pfilename)
3096 	{
3097 		close();
3098 
3099 		m_eof_flag = false;
3100 		m_error_flag = false;
3101 
3102 #if defined(_MSC_VER)
3103 		m_pFile = nullptr;
3104 		fopen_s(&m_pFile, Pfilename, "rb");
3105 #else
3106 		m_pFile = fopen(Pfilename, "rb");
3107 #endif
3108 		return m_pFile != nullptr;
3109 	}
3110 
3111 	int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag)
3112 	{
3113 		if (!m_pFile)
3114 			return -1;
3115 
3116 		if (m_eof_flag)
3117 		{
3118 			*pEOF_flag = true;
3119 			return 0;
3120 		}
3121 
3122 		if (m_error_flag)
3123 			return -1;
3124 
3125 		int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3126 		if (bytes_read < max_bytes_to_read)
3127 		{
3128 			if (ferror(m_pFile))
3129 			{
3130 				m_error_flag = true;
3131 				return -1;
3132 			}
3133 
3134 			m_eof_flag = true;
3135 			*pEOF_flag = true;
3136 		}
3137 
3138 		return bytes_read;
3139 	}
3140 
3141 	bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size)
3142 	{
3143 		close();
3144 		m_pSrc_data = pSrc_data;
3145 		m_ofs = 0;
3146 		m_size = size;
3147 		return true;
3148 	}
3149 
3150 	int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag)
3151 	{
3152 		*pEOF_flag = false;
3153 
3154 		if (!m_pSrc_data)
3155 			return -1;
3156 
3157 		uint bytes_remaining = m_size - m_ofs;
3158 		if ((uint)max_bytes_to_read > bytes_remaining)
3159 		{
3160 			max_bytes_to_read = bytes_remaining;
3161 			*pEOF_flag = true;
3162 		}
3163 
3164 		memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
3165 		m_ofs += max_bytes_to_read;
3166 
3167 		return max_bytes_to_read;
3168 	}
3169 
3170 	unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
3171 	{
3172 		if (!actual_comps)
3173 			return nullptr;
3174 		*actual_comps = 0;
3175 
3176 		if ((!pStream) || (!width) || (!height) || (!req_comps))
3177 			return nullptr;
3178 
3179 		if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
3180 			return nullptr;
3181 
3182 		jpeg_decoder decoder(pStream, flags);
3183 		if (decoder.get_error_code() != JPGD_SUCCESS)
3184 			return nullptr;
3185 
3186 		const int image_width = decoder.get_width(), image_height = decoder.get_height();
3187 		*width = image_width;
3188 		*height = image_height;
3189 		*actual_comps = decoder.get_num_components();
3190 
3191 		if (decoder.begin_decoding() != JPGD_SUCCESS)
3192 			return nullptr;
3193 
3194 		const int dst_bpl = image_width * req_comps;
3195 
3196 		uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
3197 		if (!pImage_data)
3198 			return nullptr;
3199 
3200 		for (int y = 0; y < image_height; y++)
3201 		{
3202 			const uint8* pScan_line;
3203 			uint scan_line_len;
3204 			if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
3205 			{
3206 				jpgd_free(pImage_data);
3207 				return nullptr;
3208 			}
3209 
3210 			uint8* pDst = pImage_data + y * dst_bpl;
3211 
3212 			if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3)))
3213 				memcpy(pDst, pScan_line, dst_bpl);
3214 			else if (decoder.get_num_components() == 1)
3215 			{
3216 				if (req_comps == 3)
3217 				{
3218 					for (int x = 0; x < image_width; x++)
3219 					{
3220 						uint8 luma = pScan_line[x];
3221 						pDst[0] = luma;
3222 						pDst[1] = luma;
3223 						pDst[2] = luma;
3224 						pDst += 3;
3225 					}
3226 				}
3227 				else
3228 				{
3229 					for (int x = 0; x < image_width; x++)
3230 					{
3231 						uint8 luma = pScan_line[x];
3232 						pDst[0] = luma;
3233 						pDst[1] = luma;
3234 						pDst[2] = luma;
3235 						pDst[3] = 255;
3236 						pDst += 4;
3237 					}
3238 				}
3239 			}
3240 			else if (decoder.get_num_components() == 3)
3241 			{
3242 				if (req_comps == 1)
3243 				{
3244 					const int YR = 19595, YG = 38470, YB = 7471;
3245 					for (int x = 0; x < image_width; x++)
3246 					{
3247 						int r = pScan_line[x * 4 + 0];
3248 						int g = pScan_line[x * 4 + 1];
3249 						int b = pScan_line[x * 4 + 2];
3250 						*pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
3251 					}
3252 				}
3253 				else
3254 				{
3255 					for (int x = 0; x < image_width; x++)
3256 					{
3257 						pDst[0] = pScan_line[x * 4 + 0];
3258 						pDst[1] = pScan_line[x * 4 + 1];
3259 						pDst[2] = pScan_line[x * 4 + 2];
3260 						pDst += 3;
3261 					}
3262 				}
3263 			}
3264 		}
3265 
3266 		return pImage_data;
3267 	}
3268 
3269 	unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
3270 	{
3271 		jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
3272 		return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps, flags);
3273 	}
3274 
3275 	unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
3276 	{
3277 		jpgd::jpeg_decoder_file_stream file_stream;
3278 		if (!file_stream.open(pSrc_filename))
3279 			return nullptr;
3280 		return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps, flags);
3281 	}
3282 
3283 } // namespace jpgd
3284