1 #include "stb_image.h"
2 
3 #ifndef STBI_HEADER_FILE_ONLY
4 
5 #ifndef STBI_NO_HDR
6 #include <math.h>    // ldexp
7 #include <string.h>  // strcmp, strtok
8 #endif
9 
10 #ifndef STBI_NO_STDIO
11 #include <stdio.h>
12 #endif
13 #include <stdlib.h>
14 #include <memory.h>
15 #include <assert.h>
16 #include <stdarg.h>
17 
18 #ifndef _MSC_VER
19 #ifdef __cplusplus
20 #define stbi_inline inline
21 #else
22 #define stbi_inline
23 #endif
24 #else
25 #define stbi_inline __forceinline
26 #endif
27 
28 // implementation:
29 typedef unsigned char uint8;
30 typedef unsigned short uint16;
31 typedef signed short int16;
32 typedef unsigned int uint32;
33 typedef signed int int32;
34 typedef unsigned int uint;
35 
36 // should produce compiler error if size is wrong
37 typedef unsigned char validate_uint32[sizeof(uint32) == 4 ? 1 : -1];
38 
39 #if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE)
40 #define STBI_NO_WRITE
41 #endif
42 
43 #define STBI_NOTUSED(v) (void)sizeof(v)
44 
45 #ifdef _MSC_VER
46 #define STBI_HAS_LROTL
47 #endif
48 
49 #ifdef STBI_HAS_LROTL
50 #define stbi_lrot(x, y) _lrotl(x, y)
51 #else
52 #define stbi_lrot(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
53 #endif
54 
55 ///////////////////////////////////////////////
56 //
57 //  stbi struct and start_xxx functions
58 
59 // stbi structure is our basic context used by all images, so it
60 // contains all the IO context, plus some basic image information
61 typedef struct
62 {
63 	uint32 img_x, img_y;
64 	int img_n, img_out_n;
65 
66 	stbi_io_callbacks io;
67 	void *io_user_data;
68 
69 	int read_from_callbacks;
70 	int buflen;
71 	uint8 buffer_start[128];
72 
73 	uint8 *img_buffer, *img_buffer_end;
74 	uint8 *img_buffer_original;
75 } stbi;
76 
77 static void refill_buffer(stbi *s);
78 
79 // initialize a memory-decode context
start_mem(stbi * s,uint8 const * buffer,int len)80 static void start_mem(stbi *s, uint8 const *buffer, int len)
81 {
82 	s->io.read = NULL;
83 	s->read_from_callbacks = 0;
84 	s->img_buffer = s->img_buffer_original = (uint8 *)buffer;
85 	s->img_buffer_end = (uint8 *)buffer + len;
86 }
87 
88 // initialize a callback-based context
start_callbacks(stbi * s,stbi_io_callbacks * c,void * user)89 static void start_callbacks(stbi *s, stbi_io_callbacks *c, void *user)
90 {
91 	s->io = *c;
92 	s->io_user_data = user;
93 	s->buflen = sizeof(s->buffer_start);
94 	s->read_from_callbacks = 1;
95 	s->img_buffer_original = s->buffer_start;
96 	refill_buffer(s);
97 }
98 
99 #ifndef STBI_NO_STDIO
100 
stdio_read(void * user,char * data,int size)101 static int stdio_read(void *user, char *data, int size)
102 {
103 	return (int)fread(data, 1, size, (FILE *)user);
104 }
105 
stdio_skip(void * user,unsigned n)106 static void stdio_skip(void *user, unsigned n)
107 {
108 	fseek((FILE *)user, n, SEEK_CUR);
109 }
110 
stdio_eof(void * user)111 static int stdio_eof(void *user)
112 {
113 	return feof((FILE *)user);
114 }
115 
116 static stbi_io_callbacks stbi_stdio_callbacks =
117 	{
118 		stdio_read,
119 		stdio_skip,
120 		stdio_eof,
121 };
122 
start_file(stbi * s,FILE * f)123 static void start_file(stbi *s, FILE *f)
124 {
125 	start_callbacks(s, &stbi_stdio_callbacks, (void *)f);
126 }
127 
128 //static void stop_file(stbi *s) { }
129 
130 #endif  // !STBI_NO_STDIO
131 
stbi_rewind(stbi * s)132 static void stbi_rewind(stbi *s)
133 {
134 	// conceptually rewind SHOULD rewind to the beginning of the stream,
135 	// but we just rewind to the beginning of the initial buffer, because
136 	// we only use it after doing 'test', which only ever looks at at most 92 bytes
137 	s->img_buffer = s->img_buffer_original;
138 }
139 
140 static int stbi_jpeg_test(stbi *s);
141 static stbi_uc *stbi_jpeg_load(stbi *s, int *x, int *y, int *comp, int req_comp);
142 static int stbi_jpeg_info(stbi *s, int *x, int *y, int *comp);
143 static int stbi_png_test(stbi *s);
144 static stbi_uc *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp);
145 static int stbi_png_info(stbi *s, int *x, int *y, int *comp);
146 static int stbi_bmp_test(stbi *s);
147 static stbi_uc *stbi_bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp);
148 static int stbi_tga_test(stbi *s);
149 static stbi_uc *stbi_tga_load(stbi *s, int *x, int *y, int *comp, int req_comp);
150 static int stbi_tga_info(stbi *s, int *x, int *y, int *comp);
151 static int stbi_psd_test(stbi *s);
152 static stbi_uc *stbi_psd_load(stbi *s, int *x, int *y, int *comp, int req_comp);
153 static int stbi_hdr_test(stbi *s);
154 static float *stbi_hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp);
155 static int stbi_pic_test(stbi *s);
156 static stbi_uc *stbi_pic_load(stbi *s, int *x, int *y, int *comp, int req_comp);
157 static int stbi_gif_test(stbi *s);
158 static stbi_uc *stbi_gif_load(stbi *s, int *x, int *y, int *comp, int req_comp);
159 static int stbi_gif_info(stbi *s, int *x, int *y, int *comp);
160 
161 // this is not threadsafe
162 static const char *failure_reason;
163 
stbi_failure_reason(void)164 const char *stbi_failure_reason(void)
165 {
166 	return failure_reason;
167 }
168 
e(const char * str)169 static int e(const char *str)
170 {
171 	failure_reason = str;
172 	return 0;
173 }
174 
175 // e - error
176 // epf - error returning pointer to float
177 // epuc - error returning pointer to unsigned char
178 
179 #ifdef STBI_NO_FAILURE_STRINGS
180 #define e(x, y) 0
181 #elif defined(STBI_FAILURE_USERMSG)
182 #define e(x, y) e(y)
183 #else
184 #define e(x, y) e(x)
185 #endif
186 
187 #define epf(x, y) ((float *)(e(x, y) ? NULL : NULL))
188 #define epuc(x, y) ((unsigned char *)(e(x, y) ? NULL : NULL))
189 
stbi_image_free(void * retval_from_stbi_load)190 void stbi_image_free(void *retval_from_stbi_load)
191 {
192 	free(retval_from_stbi_load);
193 }
194 
195 #ifndef STBI_NO_HDR
196 static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
197 static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp);
198 #endif
199 
stbi_load_main(stbi * s,int * x,int * y,int * comp,int req_comp)200 static unsigned char *stbi_load_main(stbi *s, int *x, int *y, int *comp, int req_comp)
201 {
202 	if (stbi_jpeg_test(s)) return stbi_jpeg_load(s, x, y, comp, req_comp);
203 	if (stbi_png_test(s)) return stbi_png_load(s, x, y, comp, req_comp);
204 	if (stbi_bmp_test(s)) return stbi_bmp_load(s, x, y, comp, req_comp);
205 	if (stbi_gif_test(s)) return stbi_gif_load(s, x, y, comp, req_comp);
206 	if (stbi_psd_test(s)) return stbi_psd_load(s, x, y, comp, req_comp);
207 	if (stbi_pic_test(s)) return stbi_pic_load(s, x, y, comp, req_comp);
208 
209 #ifndef STBI_NO_HDR
210 	if (stbi_hdr_test(s))
211 	{
212 		float *hdr = stbi_hdr_load(s, x, y, comp, req_comp);
213 		return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
214 	}
215 #endif
216 
217 	// test tga last because it's a crappy test!
218 	if (stbi_tga_test(s))
219 		return stbi_tga_load(s, x, y, comp, req_comp);
220 	return epuc("unknown image type", "Image not of any known type, or corrupt");
221 }
222 
223 #ifndef STBI_NO_STDIO
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)224 unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
225 {
226 	FILE *f = fopen(filename, "rb");
227 	unsigned char *result;
228 	if (!f) return epuc("can't fopen", "Unable to open file");
229 	result = stbi_load_from_file(f, x, y, comp, req_comp);
230 	fclose(f);
231 	return result;
232 }
233 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)234 unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
235 {
236 	stbi s;
237 	start_file(&s, f);
238 	return stbi_load_main(&s, x, y, comp, req_comp);
239 }
240 #endif  //!STBI_NO_STDIO
241 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)242 unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
243 {
244 	stbi s;
245 	start_mem(&s, buffer, len);
246 	return stbi_load_main(&s, x, y, comp, req_comp);
247 }
248 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)249 unsigned char *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
250 {
251 	stbi s;
252 	start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
253 	return stbi_load_main(&s, x, y, comp, req_comp);
254 }
255 
256 #ifndef STBI_NO_HDR
257 
stbi_loadf_main(stbi * s,int * x,int * y,int * comp,int req_comp)258 float *stbi_loadf_main(stbi *s, int *x, int *y, int *comp, int req_comp)
259 {
260 	unsigned char *data;
261 #ifndef STBI_NO_HDR
262 	if (stbi_hdr_test(s))
263 		return stbi_hdr_load(s, x, y, comp, req_comp);
264 #endif
265 	data = stbi_load_main(s, x, y, comp, req_comp);
266 	if (data)
267 		return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
268 	return epf("unknown image type", "Image not of any known type, or corrupt");
269 }
270 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)271 float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
272 {
273 	stbi s;
274 	start_mem(&s, buffer, len);
275 	return stbi_loadf_main(&s, x, y, comp, req_comp);
276 }
277 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)278 float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
279 {
280 	stbi s;
281 	start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
282 	return stbi_loadf_main(&s, x, y, comp, req_comp);
283 }
284 
285 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)286 float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
287 {
288 	FILE *f = fopen(filename, "rb");
289 	float *result;
290 	if (!f) return epf("can't fopen", "Unable to open file");
291 	result = stbi_loadf_from_file(f, x, y, comp, req_comp);
292 	fclose(f);
293 	return result;
294 }
295 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)296 float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
297 {
298 	stbi s;
299 	start_file(&s, f);
300 	return stbi_loadf_main(&s, x, y, comp, req_comp);
301 }
302 #endif  // !STBI_NO_STDIO
303 
304 #endif  // !STBI_NO_HDR
305 
306 // these is-hdr-or-not is defined independent of whether STBI_NO_HDR is
307 // defined, for API simplicity; if STBI_NO_HDR is defined, it always
308 // reports false!
309 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)310 int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
311 {
312 #ifndef STBI_NO_HDR
313 	stbi s;
314 	start_mem(&s, buffer, len);
315 	return stbi_hdr_test(&s);
316 #else
317 	STBI_NOTUSED(buffer);
318 	STBI_NOTUSED(len);
319 	return 0;
320 #endif
321 }
322 
323 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)324 extern int stbi_is_hdr(char const *filename)
325 {
326 	FILE *f = fopen(filename, "rb");
327 	int result = 0;
328 	if (f)
329 	{
330 		result = stbi_is_hdr_from_file(f);
331 		fclose(f);
332 	}
333 	return result;
334 }
335 
stbi_is_hdr_from_file(FILE * f)336 extern int stbi_is_hdr_from_file(FILE *f)
337 {
338 #ifndef STBI_NO_HDR
339 	stbi s;
340 	start_file(&s, f);
341 	return stbi_hdr_test(&s);
342 #else
343 	return 0;
344 #endif
345 }
346 #endif  // !STBI_NO_STDIO
347 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)348 extern int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
349 {
350 #ifndef STBI_NO_HDR
351 	stbi s;
352 	start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
353 	return stbi_hdr_test(&s);
354 #else
355 	return 0;
356 #endif
357 }
358 
359 #ifndef STBI_NO_HDR
360 static float h2l_gamma_i = 1.0f / 2.2f, h2l_scale_i = 1.0f;
361 static float l2h_gamma = 2.2f, l2h_scale = 1.0f;
362 
stbi_hdr_to_ldr_gamma(float gamma)363 void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1 / gamma; }
stbi_hdr_to_ldr_scale(float scale)364 void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1 / scale; }
365 
stbi_ldr_to_hdr_gamma(float gamma)366 void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)367 void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; }
368 #endif
369 
370 //////////////////////////////////////////////////////////////////////////////
371 //
372 // Common code used by all image loaders
373 //
374 
375 enum
376 {
377 	SCAN_load = 0,
378 	SCAN_type,
379 	SCAN_header
380 };
381 
refill_buffer(stbi * s)382 static void refill_buffer(stbi *s)
383 {
384 	int n = (s->io.read)(s->io_user_data, (char *)s->buffer_start, s->buflen);
385 	if (n == 0)
386 	{
387 		// at end of file, treat same as if from memory
388 		s->read_from_callbacks = 0;
389 		s->img_buffer = s->img_buffer_end - 1;
390 		*s->img_buffer = 0;
391 	}
392 	else
393 	{
394 		s->img_buffer = s->buffer_start;
395 		s->img_buffer_end = s->buffer_start + n;
396 	}
397 }
398 
get8(stbi * s)399 stbi_inline static int get8(stbi *s)
400 {
401 	if (s->img_buffer < s->img_buffer_end)
402 		return *s->img_buffer++;
403 	if (s->read_from_callbacks)
404 	{
405 		refill_buffer(s);
406 		return *s->img_buffer++;
407 	}
408 	return 0;
409 }
410 
at_eof(stbi * s)411 stbi_inline static int at_eof(stbi *s)
412 {
413 	if (s->io.read)
414 	{
415 		if (!(s->io.eof)(s->io_user_data)) return 0;
416 		// if feof() is true, check if buffer = end
417 		// special case: we've only got the special 0 character at the end
418 		if (s->read_from_callbacks == 0) return 1;
419 	}
420 
421 	return s->img_buffer >= s->img_buffer_end;
422 }
423 
get8u(stbi * s)424 stbi_inline static uint8 get8u(stbi *s)
425 {
426 	return (uint8)get8(s);
427 }
428 
skip(stbi * s,int n)429 static void skip(stbi *s, int n)
430 {
431 	if (s->io.read)
432 	{
433 		int blen = s->img_buffer_end - s->img_buffer;
434 		if (blen < n)
435 		{
436 			s->img_buffer = s->img_buffer_end;
437 			(s->io.skip)(s->io_user_data, n - blen);
438 			return;
439 		}
440 	}
441 	s->img_buffer += n;
442 }
443 
getn(stbi * s,stbi_uc * buffer,int n)444 static int getn(stbi *s, stbi_uc *buffer, int n)
445 {
446 	if (s->io.read)
447 	{
448 		int blen = s->img_buffer_end - s->img_buffer;
449 		if (blen < n)
450 		{
451 			int res, count;
452 
453 			memcpy(buffer, s->img_buffer, blen);
454 
455 			count = (s->io.read)(s->io_user_data, (char *)buffer + blen, n - blen);
456 			res = (count == (n - blen));
457 			s->img_buffer = s->img_buffer_end;
458 			return res;
459 		}
460 	}
461 
462 	if (s->img_buffer + n <= s->img_buffer_end)
463 	{
464 		memcpy(buffer, s->img_buffer, n);
465 		s->img_buffer += n;
466 		return 1;
467 	}
468 	else
469 		return 0;
470 }
471 
get16(stbi * s)472 static int get16(stbi *s)
473 {
474 	int z = get8(s);
475 	return (z << 8) + get8(s);
476 }
477 
get32(stbi * s)478 static uint32 get32(stbi *s)
479 {
480 	uint32 z = get16(s);
481 	return (z << 16) + get16(s);
482 }
483 
get16le(stbi * s)484 static int get16le(stbi *s)
485 {
486 	int z = get8(s);
487 	return z + (get8(s) << 8);
488 }
489 
get32le(stbi * s)490 static uint32 get32le(stbi *s)
491 {
492 	uint32 z = get16le(s);
493 	return z + (get16le(s) << 16);
494 }
495 
496 //////////////////////////////////////////////////////////////////////////////
497 //
498 //  generic converter from built-in img_n to req_comp
499 //    individual types do this automatically as much as possible (e.g. jpeg
500 //    does all cases internally since it needs to colorspace convert anyway,
501 //    and it never has alpha, so very few cases ). png can automatically
502 //    interleave an alpha=255 channel, but falls back to this for other cases
503 //
504 //  assume data buffer is malloced, so malloc a new one and free that one
505 //  only failure mode is malloc failing
506 
compute_y(int r,int g,int b)507 static uint8 compute_y(int r, int g, int b)
508 {
509 	return (uint8)(((r * 77) + (g * 150) + (29 * b)) >> 8);
510 }
511 
convert_format(unsigned char * data,int img_n,int req_comp,uint x,uint y)512 static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y)
513 {
514 	int i, j;
515 	unsigned char *good;
516 
517 	if (req_comp == img_n) return data;
518 	assert(req_comp >= 1 && req_comp <= 4);
519 
520 	good = (unsigned char *)malloc(req_comp * x * y);
521 	if (good == NULL)
522 	{
523 		free(data);
524 		return epuc("outofmem", "Out of memory");
525 	}
526 
527 	for (j = 0; j < (int)y; ++j)
528 	{
529 		unsigned char *src = data + j * x * img_n;
530 		unsigned char *dest = good + j * x * req_comp;
531 
532 #define COMBO(a, b) ((a)*8 + (b))
533 #define CASE(a, b)    \
534 	case COMBO(a, b): \
535 		for (i = x - 1; i >= 0; --i, src += a, dest += b)
536 		// convert source image with img_n components to one with req_comp components;
537 		// avoid switch per pixel, so use switch per scanline and massive macros
538 		switch (COMBO(img_n, req_comp))
539 		{
540 			CASE(1, 2)
541 			dest[0] = src[0],
542 			dest[1] = 255;
543 			break;
544 			CASE(1, 3)
545 			dest[0] = dest[1] = dest[2] = src[0];
546 			break;
547 			CASE(1, 4)
548 			dest[0] = dest[1] = dest[2] = src[0],
549 			dest[3] = 255;
550 			break;
551 			CASE(2, 1)
552 			dest[0] = src[0];
553 			break;
554 			CASE(2, 3)
555 			dest[0] = dest[1] = dest[2] = src[0];
556 			break;
557 			CASE(2, 4)
558 			dest[0] = dest[1] = dest[2] = src[0],
559 			dest[3] = src[1];
560 			break;
561 			CASE(3, 4)
562 			dest[0] = src[0],
563 			dest[1] = src[1], dest[2] = src[2], dest[3] = 255;
564 			break;
565 			CASE(3, 1)
566 			dest[0] = compute_y(src[0], src[1], src[2]);
567 			break;
568 			CASE(3, 2)
569 			dest[0] = compute_y(src[0], src[1], src[2]),
570 			dest[1] = 255;
571 			break;
572 			CASE(4, 1)
573 			dest[0] = compute_y(src[0], src[1], src[2]);
574 			break;
575 			CASE(4, 2)
576 			dest[0] = compute_y(src[0], src[1], src[2]),
577 			dest[1] = src[3];
578 			break;
579 			CASE(4, 3)
580 			dest[0] = src[0],
581 			dest[1] = src[1], dest[2] = src[2];
582 			break;
583 			default:
584 				assert(0);
585 		}
586 #undef CASE
587 	}
588 
589 	free(data);
590 	return good;
591 }
592 
593 #ifndef STBI_NO_HDR
ldr_to_hdr(stbi_uc * data,int x,int y,int comp)594 static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
595 {
596 	int i, k, n;
597 	float *output = (float *)malloc(x * y * comp * sizeof(float));
598 	if (output == NULL)
599 	{
600 		free(data);
601 		return epf("outofmem", "Out of memory");
602 	}
603 	// compute number of non-alpha components
604 	if (comp & 1)
605 		n = comp;
606 	else
607 		n = comp - 1;
608 	for (i = 0; i < x * y; ++i)
609 	{
610 		for (k = 0; k < n; ++k)
611 		{
612 			output[i * comp + k] = (float)pow(data[i * comp + k] / 255.0f, l2h_gamma) * l2h_scale;
613 		}
614 		if (k < comp) output[i * comp + k] = data[i * comp + k] / 255.0f;
615 	}
616 	free(data);
617 	return output;
618 }
619 
620 #define float2int(x) ((int)(x))
hdr_to_ldr(float * data,int x,int y,int comp)621 static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp)
622 {
623 	int i, k, n;
624 	stbi_uc *output = (stbi_uc *)malloc(x * y * comp);
625 	if (output == NULL)
626 	{
627 		free(data);
628 		return epuc("outofmem", "Out of memory");
629 	}
630 	// compute number of non-alpha components
631 	if (comp & 1)
632 		n = comp;
633 	else
634 		n = comp - 1;
635 	for (i = 0; i < x * y; ++i)
636 	{
637 		for (k = 0; k < n; ++k)
638 		{
639 			float z = (float)pow(data[i * comp + k] * h2l_scale_i, h2l_gamma_i) * 255 + 0.5f;
640 			if (z < 0) z = 0;
641 			if (z > 255) z = 255;
642 			output[i * comp + k] = (uint8)float2int(z);
643 		}
644 		if (k < comp)
645 		{
646 			float z = data[i * comp + k] * 255 + 0.5f;
647 			if (z < 0) z = 0;
648 			if (z > 255) z = 255;
649 			output[i * comp + k] = (uint8)float2int(z);
650 		}
651 	}
652 	free(data);
653 	return output;
654 }
655 #endif
656 
657 //////////////////////////////////////////////////////////////////////////////
658 //
659 //  "baseline" JPEG/JFIF decoder (not actually fully baseline implementation)
660 //
661 //    simple implementation
662 //      - channel subsampling of at most 2 in each dimension
663 //      - doesn't support delayed output of y-dimension
664 //      - simple interface (only one output format: 8-bit interleaved RGB)
665 //      - doesn't try to recover corrupt jpegs
666 //      - doesn't allow partial loading, loading multiple at once
667 //      - still fast on x86 (copying globals into locals doesn't help x86)
668 //      - allocates lots of intermediate memory (full size of all components)
669 //        - non-interleaved case requires this anyway
670 //        - allows good upsampling (see next)
671 //    high-quality
672 //      - upsampled channels are bilinearly interpolated, even across blocks
673 //      - quality integer IDCT derived from IJG's 'slow'
674 //    performance
675 //      - fast huffman; reasonable integer IDCT
676 //      - uses a lot of intermediate memory, could cache poorly
677 //      - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4
678 //          stb_jpeg:   1.34 seconds (MSVC6, default release build)
679 //          stb_jpeg:   1.06 seconds (MSVC6, processor = Pentium Pro)
680 //          IJL11.dll:  1.08 seconds (compiled by intel)
681 //          IJG 1998:   0.98 seconds (MSVC6, makefile provided by IJG)
682 //          IJG 1998:   0.95 seconds (MSVC6, makefile + proc=PPro)
683 
684 // huffman decoding acceleration
685 #define FAST_BITS 9  // larger handles more cases; smaller stomps less cache
686 
687 typedef struct
688 {
689 	uint8 fast[1 << FAST_BITS];
690 	// weirdly, repacking this into AoS is a 10% speed loss, instead of a win
691 	uint16 code[256];
692 	uint8 values[256];
693 	uint8 size[257];
694 	unsigned int maxcode[18];
695 	int delta[17];  // old 'firstsymbol' - old 'firstcode'
696 } huffman;
697 
698 typedef struct
699 {
700 #ifdef STBI_SIMD
701 	unsigned short dequant2[4][64];
702 #endif
703 	stbi *s;
704 	huffman huff_dc[4];
705 	huffman huff_ac[4];
706 	uint8 dequant[4][64];
707 
708 	// sizes for components, interleaved MCUs
709 	int img_h_max, img_v_max;
710 	int img_mcu_x, img_mcu_y;
711 	int img_mcu_w, img_mcu_h;
712 
713 	// definition of jpeg image component
714 	struct
715 	{
716 		int id;
717 		int h, v;
718 		int tq;
719 		int hd, ha;
720 		int dc_pred;
721 
722 		int x, y, w2, h2;
723 		uint8 *data;
724 		void *raw_data;
725 		uint8 *linebuf;
726 	} img_comp[4];
727 
728 	uint32 code_buffer;    // jpeg entropy-coded buffer
729 	int code_bits;         // number of valid bits
730 	unsigned char marker;  // marker seen while filling entropy buffer
731 	int nomore;            // flag if we saw a marker so must stop
732 
733 	int scan_n, order[4];
734 	int restart_interval, todo;
735 } jpeg;
736 
build_huffman(huffman * h,int * count)737 static int build_huffman(huffman *h, int *count)
738 {
739 	int i, j, k = 0, code;
740 	// build size list for each symbol (from JPEG spec)
741 	for (i = 0; i < 16; ++i)
742 		for (j = 0; j < count[i]; ++j)
743 			h->size[k++] = (uint8)(i + 1);
744 	h->size[k] = 0;
745 
746 	// compute actual symbols (from jpeg spec)
747 	code = 0;
748 	k = 0;
749 	for (j = 1; j <= 16; ++j)
750 	{
751 		// compute delta to add to code to compute symbol id
752 		h->delta[j] = k - code;
753 		if (h->size[k] == j)
754 		{
755 			while (h->size[k] == j)
756 				h->code[k++] = (uint16)(code++);
757 			if (code - 1 >= (1 << j)) return e("bad code lengths", "Corrupt JPEG");
758 		}
759 		// compute largest code + 1 for this size, preshifted as needed later
760 		h->maxcode[j] = code << (16 - j);
761 		code <<= 1;
762 	}
763 	h->maxcode[j] = 0xffffffff;
764 
765 	// build non-spec acceleration table; 255 is flag for not-accelerated
766 	memset(h->fast, 255, 1 << FAST_BITS);
767 	for (i = 0; i < k; ++i)
768 	{
769 		int s = h->size[i];
770 		if (s <= FAST_BITS)
771 		{
772 			int c = h->code[i] << (FAST_BITS - s);
773 			int m = 1 << (FAST_BITS - s);
774 			for (j = 0; j < m; ++j)
775 			{
776 				h->fast[c + j] = (uint8)i;
777 			}
778 		}
779 	}
780 	return 1;
781 }
782 
grow_buffer_unsafe(jpeg * j)783 static void grow_buffer_unsafe(jpeg *j)
784 {
785 	do
786 	{
787 		int b = j->nomore ? 0 : get8(j->s);
788 		if (b == 0xff)
789 		{
790 			int c = get8(j->s);
791 			if (c != 0)
792 			{
793 				j->marker = (unsigned char)c;
794 				j->nomore = 1;
795 				return;
796 			}
797 		}
798 		j->code_buffer |= b << (24 - j->code_bits);
799 		j->code_bits += 8;
800 	} while (j->code_bits <= 24);
801 }
802 
803 // (1 << n) - 1
804 static uint32 bmask[17] = {0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535};
805 
806 // decode a jpeg huffman value from the bitstream
decode(jpeg * j,huffman * h)807 stbi_inline static int decode(jpeg *j, huffman *h)
808 {
809 	unsigned int temp;
810 	int c, k;
811 
812 	if (j->code_bits < 16) grow_buffer_unsafe(j);
813 
814 	// look at the top FAST_BITS and determine what symbol ID it is,
815 	// if the code is <= FAST_BITS
816 	c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
817 	k = h->fast[c];
818 	if (k < 255)
819 	{
820 		int s = h->size[k];
821 		if (s > j->code_bits)
822 			return -1;
823 		j->code_buffer <<= s;
824 		j->code_bits -= s;
825 		return h->values[k];
826 	}
827 
828 	// naive test is to shift the code_buffer down so k bits are
829 	// valid, then test against maxcode. To speed this up, we've
830 	// preshifted maxcode left so that it has (16-k) 0s at the
831 	// end; in other words, regardless of the number of bits, it
832 	// wants to be compared against something shifted to have 16;
833 	// that way we don't need to shift inside the loop.
834 	temp = j->code_buffer >> 16;
835 	for (k = FAST_BITS + 1;; ++k)
836 		if (temp < h->maxcode[k])
837 			break;
838 	if (k == 17)
839 	{
840 		// error! code not found
841 		j->code_bits -= 16;
842 		return -1;
843 	}
844 
845 	if (k > j->code_bits)
846 		return -1;
847 
848 	// convert the huffman code to the symbol id
849 	c = ((j->code_buffer >> (32 - k)) & bmask[k]) + h->delta[k];
850 	assert((((j->code_buffer) >> (32 - h->size[c])) & bmask[h->size[c]]) == h->code[c]);
851 
852 	// convert the id to a symbol
853 	j->code_bits -= k;
854 	j->code_buffer <<= k;
855 	return h->values[c];
856 }
857 
858 // combined JPEG 'receive' and JPEG 'extend', since baseline
859 // always extends everything it receives.
extend_receive(jpeg * j,int n)860 stbi_inline static int extend_receive(jpeg *j, int n)
861 {
862 	unsigned int m = 1 << (n - 1);
863 	unsigned int k;
864 	if (j->code_bits < n) grow_buffer_unsafe(j);
865 
866 #if 1
867 	k = stbi_lrot(j->code_buffer, n);
868 	j->code_buffer = k & ~bmask[n];
869 	k &= bmask[n];
870 	j->code_bits -= n;
871 #else
872 	k = (j->code_buffer >> (32 - n)) & bmask[n];
873 	j->code_bits -= n;
874 	j->code_buffer <<= n;
875 #endif
876 	// the following test is probably a random branch that won't
877 	// predict well. I tried to table accelerate it but failed.
878 	// maybe it's compiling as a conditional move?
879 	if (k < m)
880 		return (-1 << n) + k + 1;
881 	else
882 		return k;
883 }
884 
885 // given a value that's at position X in the zigzag stream,
886 // where does it appear in the 8x8 matrix coded as row-major?
887 static uint8 dezigzag[64 + 15] =
888 	{
889 		0, 1, 8, 16, 9, 2, 3, 10,
890 		17, 24, 32, 25, 18, 11, 4, 5,
891 		12, 19, 26, 33, 40, 48, 41, 34,
892 		27, 20, 13, 6, 7, 14, 21, 28,
893 		35, 42, 49, 56, 57, 50, 43, 36,
894 		29, 22, 15, 23, 30, 37, 44, 51,
895 		58, 59, 52, 45, 38, 31, 39, 46,
896 		53, 60, 61, 54, 47, 55, 62, 63,
897 		// let corrupt input sample past end
898 		63, 63, 63, 63, 63, 63, 63, 63,
899 		63, 63, 63, 63, 63, 63, 63};
900 
901 // decode one 64-entry block--
decode_block(jpeg * j,short data[64],huffman * hdc,huffman * hac,int b)902 static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b)
903 {
904 	int diff, dc, k;
905 	int t = decode(j, hdc);
906 	if (t < 0) return e("bad huffman code", "Corrupt JPEG");
907 
908 	// 0 all the ac values now so we can do it 32-bits at a time
909 	memset(data, 0, 64 * sizeof(data[0]));
910 
911 	diff = t ? extend_receive(j, t) : 0;
912 	dc = j->img_comp[b].dc_pred + diff;
913 	j->img_comp[b].dc_pred = dc;
914 	data[0] = (short)dc;
915 
916 	// decode AC components, see JPEG spec
917 	k = 1;
918 	do
919 	{
920 		int r, s;
921 		int rs = decode(j, hac);
922 		if (rs < 0) return e("bad huffman code", "Corrupt JPEG");
923 		s = rs & 15;
924 		r = rs >> 4;
925 		if (s == 0)
926 		{
927 			if (rs != 0xf0) break;  // end block
928 			k += 16;
929 		}
930 		else
931 		{
932 			k += r;
933 			// decode into unzigzag'd location
934 			data[dezigzag[k++]] = (short)extend_receive(j, s);
935 		}
936 	} while (k < 64);
937 	return 1;
938 }
939 
940 // take a -128..127 value and clamp it and convert to 0..255
clamp(int x)941 stbi_inline static uint8 clamp(int x)
942 {
943 	// trick to use a single test to catch both cases
944 	if ((unsigned int)x > 255)
945 	{
946 		if (x < 0) return 0;
947 		if (x > 255) return 255;
948 	}
949 	return (uint8)x;
950 }
951 
952 #define f2f(x) (int)(((x)*4096 + 0.5))
953 #define fsh(x) ((x) << 12)
954 
955 // derived from jidctint -- DCT_ISLOW
956 #define IDCT_1D(s0, s1, s2, s3, s4, s5, s6, s7)             \
957 	int t0, t1, t2, t3, p1, p2, p3, p4, p5, x0, x1, x2, x3; \
958 	p2 = s2;                                                \
959 	p3 = s6;                                                \
960 	p1 = (p2 + p3) * f2f(0.5411961f);                       \
961 	t2 = p1 + p3 * f2f(-1.847759065f);                      \
962 	t3 = p1 + p2 * f2f(0.765366865f);                       \
963 	p2 = s0;                                                \
964 	p3 = s4;                                                \
965 	t0 = fsh(p2 + p3);                                      \
966 	t1 = fsh(p2 - p3);                                      \
967 	x0 = t0 + t3;                                           \
968 	x3 = t0 - t3;                                           \
969 	x1 = t1 + t2;                                           \
970 	x2 = t1 - t2;                                           \
971 	t0 = s7;                                                \
972 	t1 = s5;                                                \
973 	t2 = s3;                                                \
974 	t3 = s1;                                                \
975 	p3 = t0 + t2;                                           \
976 	p4 = t1 + t3;                                           \
977 	p1 = t0 + t3;                                           \
978 	p2 = t1 + t2;                                           \
979 	p5 = (p3 + p4) * f2f(1.175875602f);                     \
980 	t0 = t0 * f2f(0.298631336f);                            \
981 	t1 = t1 * f2f(2.053119869f);                            \
982 	t2 = t2 * f2f(3.072711026f);                            \
983 	t3 = t3 * f2f(1.501321110f);                            \
984 	p1 = p5 + p1 * f2f(-0.899976223f);                      \
985 	p2 = p5 + p2 * f2f(-2.562915447f);                      \
986 	p3 = p3 * f2f(-1.961570560f);                           \
987 	p4 = p4 * f2f(-0.390180644f);                           \
988 	t3 += p1 + p4;                                          \
989 	t2 += p2 + p3;                                          \
990 	t1 += p2 + p4;                                          \
991 	t0 += p1 + p3;
992 
993 #ifdef STBI_SIMD
994 typedef unsigned short stbi_dequantize_t;
995 #else
996 typedef uint8 stbi_dequantize_t;
997 #endif
998 
999 // .344 seconds on 3*anemones.jpg
idct_block(uint8 * out,int out_stride,short data[64],stbi_dequantize_t * dequantize)1000 static void idct_block(uint8 *out, int out_stride, short data[64], stbi_dequantize_t *dequantize)
1001 {
1002 	int i, val[64], *v = val;
1003 	stbi_dequantize_t *dq = dequantize;
1004 	uint8 *o;
1005 	short *d = data;
1006 
1007 	// columns
1008 	for (i = 0; i < 8; ++i, ++d, ++dq, ++v)
1009 	{
1010 		// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
1011 		if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0 && d[40] == 0 && d[48] == 0 && d[56] == 0)
1012 		{
1013 			//    no shortcut                 0     seconds
1014 			//    (1|2|3|4|5|6|7)==0          0     seconds
1015 			//    all separate               -0.047 seconds
1016 			//    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
1017 			int dcterm = d[0] * dq[0] << 2;
1018 			v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
1019 		}
1020 		else
1021 		{
1022 			IDCT_1D(d[0] * dq[0], d[8] * dq[8], d[16] * dq[16], d[24] * dq[24],
1023 					d[32] * dq[32], d[40] * dq[40], d[48] * dq[48], d[56] * dq[56])
1024 			// constants scaled things up by 1<<12; let's bring them back
1025 			// down, but keep 2 extra bits of precision
1026 			x0 += 512;
1027 			x1 += 512;
1028 			x2 += 512;
1029 			x3 += 512;
1030 			v[0] = (x0 + t3) >> 10;
1031 			v[56] = (x0 - t3) >> 10;
1032 			v[8] = (x1 + t2) >> 10;
1033 			v[48] = (x1 - t2) >> 10;
1034 			v[16] = (x2 + t1) >> 10;
1035 			v[40] = (x2 - t1) >> 10;
1036 			v[24] = (x3 + t0) >> 10;
1037 			v[32] = (x3 - t0) >> 10;
1038 		}
1039 	}
1040 
1041 	for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride)
1042 	{
1043 		// no fast case since the first 1D IDCT spread components out
1044 		IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
1045 		// constants scaled things up by 1<<12, plus we had 1<<2 from first
1046 		// loop, plus horizontal and vertical each scale by sqrt(8) so together
1047 		// we've got an extra 1<<3, so 1<<17 total we need to remove.
1048 		// so we want to round that, which means adding 0.5 * 1<<17,
1049 		// aka 65536. Also, we'll end up with -128 to 127 that we want
1050 		// to encode as 0..255 by adding 128, so we'll add that before the shift
1051 		x0 += 65536 + (128 << 17);
1052 		x1 += 65536 + (128 << 17);
1053 		x2 += 65536 + (128 << 17);
1054 		x3 += 65536 + (128 << 17);
1055 		// tried computing the shifts into temps, or'ing the temps to see
1056 		// if any were out of range, but that was slower
1057 		o[0] = clamp((x0 + t3) >> 17);
1058 		o[7] = clamp((x0 - t3) >> 17);
1059 		o[1] = clamp((x1 + t2) >> 17);
1060 		o[6] = clamp((x1 - t2) >> 17);
1061 		o[2] = clamp((x2 + t1) >> 17);
1062 		o[5] = clamp((x2 - t1) >> 17);
1063 		o[3] = clamp((x3 + t0) >> 17);
1064 		o[4] = clamp((x3 - t0) >> 17);
1065 	}
1066 }
1067 
1068 #ifdef STBI_SIMD
1069 static stbi_idct_8x8 stbi_idct_installed = idct_block;
1070 
stbi_install_idct(stbi_idct_8x8 func)1071 void stbi_install_idct(stbi_idct_8x8 func)
1072 {
1073 	stbi_idct_installed = func;
1074 }
1075 #endif
1076 
1077 #define MARKER_none 0xff
1078 // if there's a pending marker from the entropy stream, return that
1079 // otherwise, fetch from the stream and get a marker. if there's no
1080 // marker, return 0xff, which is never a valid marker value
get_marker(jpeg * j)1081 static uint8 get_marker(jpeg *j)
1082 {
1083 	uint8 x;
1084 	if (j->marker != MARKER_none)
1085 	{
1086 		x = j->marker;
1087 		j->marker = MARKER_none;
1088 		return x;
1089 	}
1090 	x = get8u(j->s);
1091 	if (x != 0xff) return MARKER_none;
1092 	while (x == 0xff)
1093 		x = get8u(j->s);
1094 	return x;
1095 }
1096 
1097 // in each scan, we'll have scan_n components, and the order
1098 // of the components is specified by order[]
1099 #define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
1100 
1101 // after a restart interval, reset the entropy decoder and
1102 // the dc prediction
reset(jpeg * j)1103 static void reset(jpeg *j)
1104 {
1105 	j->code_bits = 0;
1106 	j->code_buffer = 0;
1107 	j->nomore = 0;
1108 	j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
1109 	j->marker = MARKER_none;
1110 	j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
1111 	// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
1112 	// since we don't even allow 1<<30 pixels
1113 }
1114 
parse_entropy_coded_data(jpeg * z)1115 static int parse_entropy_coded_data(jpeg *z)
1116 {
1117 	reset(z);
1118 	if (z->scan_n == 1)
1119 	{
1120 		int i, j;
1121 #ifdef STBI_SIMD
1122 		__declspec(align(16))
1123 #endif
1124 			short data[64];
1125 		int n = z->order[0];
1126 		// non-interleaved data, we just need to process one block at a time,
1127 		// in trivial scanline order
1128 		// number of blocks to do just depends on how many actual "pixels" this
1129 		// component has, independent of interleaved MCU blocking and such
1130 		int w = (z->img_comp[n].x + 7) >> 3;
1131 		int h = (z->img_comp[n].y + 7) >> 3;
1132 		for (j = 0; j < h; ++j)
1133 		{
1134 			for (i = 0; i < w; ++i)
1135 			{
1136 				if (!decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + z->img_comp[n].ha, n)) return 0;
1137 #ifdef STBI_SIMD
1138 				stbi_idct_installed(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1139 #else
1140 				idct_block(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1141 #endif
1142 				// every data block is an MCU, so countdown the restart interval
1143 				if (--z->todo <= 0)
1144 				{
1145 					if (z->code_bits < 24) grow_buffer_unsafe(z);
1146 					// if it's NOT a restart, then just bail, so we get corrupt data
1147 					// rather than no data
1148 					if (!RESTART(z->marker)) return 1;
1149 					reset(z);
1150 				}
1151 			}
1152 		}
1153 	}
1154 	else
1155 	{  // interleaved!
1156 		int i, j, k, x, y;
1157 		short data[64];
1158 		for (j = 0; j < z->img_mcu_y; ++j)
1159 		{
1160 			for (i = 0; i < z->img_mcu_x; ++i)
1161 			{
1162 				// scan an interleaved mcu... process scan_n components in order
1163 				for (k = 0; k < z->scan_n; ++k)
1164 				{
1165 					int n = z->order[k];
1166 					// scan out an mcu's worth of this component; that's just determined
1167 					// by the basic H and V specified for the component
1168 					for (y = 0; y < z->img_comp[n].v; ++y)
1169 					{
1170 						for (x = 0; x < z->img_comp[n].h; ++x)
1171 						{
1172 							int x2 = (i * z->img_comp[n].h + x) * 8;
1173 							int y2 = (j * z->img_comp[n].v + y) * 8;
1174 							if (!decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + z->img_comp[n].ha, n)) return 0;
1175 #ifdef STBI_SIMD
1176 							stbi_idct_installed(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]);
1177 #else
1178 							idct_block(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]);
1179 #endif
1180 						}
1181 					}
1182 				}
1183 				// after all interleaved components, that's an interleaved MCU,
1184 				// so now count down the restart interval
1185 				if (--z->todo <= 0)
1186 				{
1187 					if (z->code_bits < 24) grow_buffer_unsafe(z);
1188 					// if it's NOT a restart, then just bail, so we get corrupt data
1189 					// rather than no data
1190 					if (!RESTART(z->marker)) return 1;
1191 					reset(z);
1192 				}
1193 			}
1194 		}
1195 	}
1196 	return 1;
1197 }
1198 
process_marker(jpeg * z,int m)1199 static int process_marker(jpeg *z, int m)
1200 {
1201 	int L;
1202 	switch (m)
1203 	{
1204 		case MARKER_none:  // no marker found
1205 			return e("expected marker", "Corrupt JPEG");
1206 
1207 		case 0xC2:  // SOF - progressive
1208 			return e("progressive jpeg", "JPEG format not supported (progressive)");
1209 
1210 		case 0xDD:  // DRI - specify restart interval
1211 			if (get16(z->s) != 4) return e("bad DRI len", "Corrupt JPEG");
1212 			z->restart_interval = get16(z->s);
1213 			return 1;
1214 
1215 		case 0xDB:  // DQT - define quantization table
1216 			L = get16(z->s) - 2;
1217 			while (L > 0)
1218 			{
1219 				int q = get8(z->s);
1220 				int p = q >> 4;
1221 				int t = q & 15, i;
1222 				if (p != 0) return e("bad DQT type", "Corrupt JPEG");
1223 				if (t > 3) return e("bad DQT table", "Corrupt JPEG");
1224 				for (i = 0; i < 64; ++i)
1225 					z->dequant[t][dezigzag[i]] = get8u(z->s);
1226 #ifdef STBI_SIMD
1227 				for (i = 0; i < 64; ++i)
1228 					z->dequant2[t][i] = z->dequant[t][i];
1229 #endif
1230 				L -= 65;
1231 			}
1232 			return L == 0;
1233 
1234 		case 0xC4:  // DHT - define huffman table
1235 			L = get16(z->s) - 2;
1236 			while (L > 0)
1237 			{
1238 				uint8 *v;
1239 				int sizes[16], i, m = 0;
1240 				int q = get8(z->s);
1241 				int tc = q >> 4;
1242 				int th = q & 15;
1243 				if (tc > 1 || th > 3) return e("bad DHT header", "Corrupt JPEG");
1244 				for (i = 0; i < 16; ++i)
1245 				{
1246 					sizes[i] = get8(z->s);
1247 					m += sizes[i];
1248 				}
1249 				L -= 17;
1250 				if (tc == 0)
1251 				{
1252 					if (!build_huffman(z->huff_dc + th, sizes)) return 0;
1253 					v = z->huff_dc[th].values;
1254 				}
1255 				else
1256 				{
1257 					if (!build_huffman(z->huff_ac + th, sizes)) return 0;
1258 					v = z->huff_ac[th].values;
1259 				}
1260 				for (i = 0; i < m; ++i)
1261 					v[i] = get8u(z->s);
1262 				L -= m;
1263 			}
1264 			return L == 0;
1265 	}
1266 	// check for comment block or APP blocks
1267 	if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE)
1268 	{
1269 		skip(z->s, get16(z->s) - 2);
1270 		return 1;
1271 	}
1272 	return 0;
1273 }
1274 
1275 // after we see SOS
process_scan_header(jpeg * z)1276 static int process_scan_header(jpeg *z)
1277 {
1278 	int i;
1279 	int Ls = get16(z->s);
1280 	z->scan_n = get8(z->s);
1281 	if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) return e("bad SOS component count", "Corrupt JPEG");
1282 	if (Ls != 6 + 2 * z->scan_n) return e("bad SOS len", "Corrupt JPEG");
1283 	for (i = 0; i < z->scan_n; ++i)
1284 	{
1285 		int id = get8(z->s), which;
1286 		int q = get8(z->s);
1287 		for (which = 0; which < z->s->img_n; ++which)
1288 			if (z->img_comp[which].id == id)
1289 				break;
1290 		if (which == z->s->img_n) return 0;
1291 		z->img_comp[which].hd = q >> 4;
1292 		if (z->img_comp[which].hd > 3) return e("bad DC huff", "Corrupt JPEG");
1293 		z->img_comp[which].ha = q & 15;
1294 		if (z->img_comp[which].ha > 3) return e("bad AC huff", "Corrupt JPEG");
1295 		z->order[i] = which;
1296 	}
1297 	if (get8(z->s) != 0) return e("bad SOS", "Corrupt JPEG");
1298 	get8(z->s);  // should be 63, but might be 0
1299 	if (get8(z->s) != 0) return e("bad SOS", "Corrupt JPEG");
1300 
1301 	return 1;
1302 }
1303 
process_frame_header(jpeg * z,int scan)1304 static int process_frame_header(jpeg *z, int scan)
1305 {
1306 	stbi *s = z->s;
1307 	int Lf, p, i, q, h_max = 1, v_max = 1, c;
1308 	Lf = get16(s);
1309 	if (Lf < 11) return e("bad SOF len", "Corrupt JPEG");  // JPEG
1310 	p = get8(s);
1311 	if (p != 8) return e("only 8-bit", "JPEG format not supported: 8-bit only");  // JPEG baseline
1312 	s->img_y = get16(s);
1313 	if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height");  // Legal, but we don't handle it--but neither does IJG
1314 	s->img_x = get16(s);
1315 	if (s->img_x == 0) return e("0 width", "Corrupt JPEG");  // JPEG requires
1316 	c = get8(s);
1317 	if (c != 3 && c != 1) return e("bad component count", "Corrupt JPEG");  // JFIF requires
1318 	s->img_n = c;
1319 	for (i = 0; i < c; ++i)
1320 	{
1321 		z->img_comp[i].data = NULL;
1322 		z->img_comp[i].linebuf = NULL;
1323 	}
1324 
1325 	if (Lf != 8 + 3 * s->img_n) return e("bad SOF len", "Corrupt JPEG");
1326 
1327 	for (i = 0; i < s->img_n; ++i)
1328 	{
1329 		z->img_comp[i].id = get8(s);
1330 		if (z->img_comp[i].id != i + 1)  // JFIF requires
1331 			if (z->img_comp[i].id != i)  // some version of jpegtran outputs non-JFIF-compliant files!
1332 				return e("bad component ID", "Corrupt JPEG");
1333 		q = get8(s);
1334 		z->img_comp[i].h = (q >> 4);
1335 		if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H", "Corrupt JPEG");
1336 		z->img_comp[i].v = q & 15;
1337 		if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V", "Corrupt JPEG");
1338 		z->img_comp[i].tq = get8(s);
1339 		if (z->img_comp[i].tq > 3) return e("bad TQ", "Corrupt JPEG");
1340 	}
1341 
1342 	if (scan != SCAN_load) return 1;
1343 
1344 	if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
1345 
1346 	for (i = 0; i < s->img_n; ++i)
1347 	{
1348 		if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
1349 		if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
1350 	}
1351 
1352 	// compute interleaved mcu info
1353 	z->img_h_max = h_max;
1354 	z->img_v_max = v_max;
1355 	z->img_mcu_w = h_max * 8;
1356 	z->img_mcu_h = v_max * 8;
1357 	z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
1358 	z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
1359 
1360 	for (i = 0; i < s->img_n; ++i)
1361 	{
1362 		// number of effective pixels (e.g. for non-interleaved MCU)
1363 		z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
1364 		z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
1365 		// to simplify generation, we'll allocate enough memory to decode
1366 		// the bogus oversized data from using interleaved MCUs and their
1367 		// big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
1368 		// discard the extra data until colorspace conversion
1369 		z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
1370 		z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
1371 		z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2 + 15);
1372 		if (z->img_comp[i].raw_data == NULL)
1373 		{
1374 			for (--i; i >= 0; --i)
1375 			{
1376 				free(z->img_comp[i].raw_data);
1377 				z->img_comp[i].data = NULL;
1378 			}
1379 			return e("outofmem", "Out of memory");
1380 		}
1381 		// align blocks for installable-idct using mmx/sse
1382 		z->img_comp[i].data = (uint8 *)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
1383 		z->img_comp[i].linebuf = NULL;
1384 	}
1385 
1386 	return 1;
1387 }
1388 
1389 // use comparisons since in some cases we handle more than one case (e.g. SOF)
1390 #define DNL(x) ((x) == 0xdc)
1391 #define SOI(x) ((x) == 0xd8)
1392 #define EOI(x) ((x) == 0xd9)
1393 #define SOF(x) ((x) == 0xc0 || (x) == 0xc1)
1394 #define SOS(x) ((x) == 0xda)
1395 
decode_jpeg_header(jpeg * z,int scan)1396 static int decode_jpeg_header(jpeg *z, int scan)
1397 {
1398 	int m;
1399 	z->marker = MARKER_none;  // initialize cached marker to empty
1400 	m = get_marker(z);
1401 	if (!SOI(m)) return e("no SOI", "Corrupt JPEG");
1402 	if (scan == SCAN_type) return 1;
1403 	m = get_marker(z);
1404 	while (!SOF(m))
1405 	{
1406 		if (!process_marker(z, m)) return 0;
1407 		m = get_marker(z);
1408 		while (m == MARKER_none)
1409 		{
1410 			// some files have extra padding after their blocks, so ok, we'll scan
1411 			if (at_eof(z->s)) return e("no SOF", "Corrupt JPEG");
1412 			m = get_marker(z);
1413 		}
1414 	}
1415 	if (!process_frame_header(z, scan)) return 0;
1416 	return 1;
1417 }
1418 
decode_jpeg_image(jpeg * j)1419 static int decode_jpeg_image(jpeg *j)
1420 {
1421 	int m;
1422 	j->restart_interval = 0;
1423 	if (!decode_jpeg_header(j, SCAN_load)) return 0;
1424 	m = get_marker(j);
1425 	while (!EOI(m))
1426 	{
1427 		if (SOS(m))
1428 		{
1429 			if (!process_scan_header(j)) return 0;
1430 			if (!parse_entropy_coded_data(j)) return 0;
1431 			if (j->marker == MARKER_none)
1432 			{
1433 				// handle 0s at the end of image data from IP Kamera 9060
1434 				while (!at_eof(j->s))
1435 				{
1436 					int x = get8(j->s);
1437 					if (x == 255)
1438 					{
1439 						j->marker = get8u(j->s);
1440 						break;
1441 					}
1442 					else if (x != 0)
1443 					{
1444 						return 0;
1445 					}
1446 				}
1447 				// if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0
1448 			}
1449 		}
1450 		else
1451 		{
1452 			if (!process_marker(j, m)) return 0;
1453 		}
1454 		m = get_marker(j);
1455 	}
1456 	return 1;
1457 }
1458 
1459 // static jfif-centered resampling (across block boundaries)
1460 
1461 typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1,
1462 									int w, int hs);
1463 
1464 #define div4(x) ((uint8)((x) >> 2))
1465 
resample_row_1(uint8 * out,uint8 * in_near,uint8 * in_far,int w,int hs)1466 static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1467 {
1468 	STBI_NOTUSED(out);
1469 	STBI_NOTUSED(in_far);
1470 	STBI_NOTUSED(w);
1471 	STBI_NOTUSED(hs);
1472 	return in_near;
1473 }
1474 
resample_row_v_2(uint8 * out,uint8 * in_near,uint8 * in_far,int w,int hs)1475 static uint8 *resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1476 {
1477 	// need to generate two samples vertically for every one in input
1478 	int i;
1479 	STBI_NOTUSED(hs);
1480 	for (i = 0; i < w; ++i)
1481 		out[i] = div4(3 * in_near[i] + in_far[i] + 2);
1482 	return out;
1483 }
1484 
resample_row_h_2(uint8 * out,uint8 * in_near,uint8 * in_far,int w,int hs)1485 static uint8 *resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1486 {
1487 	// need to generate two samples horizontally for every one in input
1488 	int i;
1489 	uint8 *input = in_near;
1490 
1491 	if (w == 1)
1492 	{
1493 		// if only one sample, can't do any interpolation
1494 		out[0] = out[1] = input[0];
1495 		return out;
1496 	}
1497 
1498 	out[0] = input[0];
1499 	out[1] = div4(input[0] * 3 + input[1] + 2);
1500 	for (i = 1; i < w - 1; ++i)
1501 	{
1502 		int n = 3 * input[i] + 2;
1503 		out[i * 2 + 0] = div4(n + input[i - 1]);
1504 		out[i * 2 + 1] = div4(n + input[i + 1]);
1505 	}
1506 	out[i * 2 + 0] = div4(input[w - 2] * 3 + input[w - 1] + 2);
1507 	out[i * 2 + 1] = input[w - 1];
1508 
1509 	STBI_NOTUSED(in_far);
1510 	STBI_NOTUSED(hs);
1511 
1512 	return out;
1513 }
1514 
1515 #define div16(x) ((uint8)((x) >> 4))
1516 
resample_row_hv_2(uint8 * out,uint8 * in_near,uint8 * in_far,int w,int hs)1517 static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1518 {
1519 	// need to generate 2x2 samples for every one in input
1520 	int i, t0, t1;
1521 	if (w == 1)
1522 	{
1523 		out[0] = out[1] = div4(3 * in_near[0] + in_far[0] + 2);
1524 		return out;
1525 	}
1526 
1527 	t1 = 3 * in_near[0] + in_far[0];
1528 	out[0] = div4(t1 + 2);
1529 	for (i = 1; i < w; ++i)
1530 	{
1531 		t0 = t1;
1532 		t1 = 3 * in_near[i] + in_far[i];
1533 		out[i * 2 - 1] = div16(3 * t0 + t1 + 8);
1534 		out[i * 2] = div16(3 * t1 + t0 + 8);
1535 	}
1536 	out[w * 2 - 1] = div4(t1 + 2);
1537 
1538 	STBI_NOTUSED(hs);
1539 
1540 	return out;
1541 }
1542 
resample_row_generic(uint8 * out,uint8 * in_near,uint8 * in_far,int w,int hs)1543 static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs)
1544 {
1545 	// resample with nearest-neighbor
1546 	int i, j;
1547 	for (i = 0; i < w; ++i)
1548 		for (j = 0; j < hs; ++j)
1549 			out[i * hs + j] = in_near[i];
1550 	return out;
1551 }
1552 
1553 #define float2fixed(x) ((int)((x)*65536 + 0.5))
1554 
1555 // 0.38 seconds on 3*anemones.jpg   (0.25 with processor = Pro)
1556 // VC6 without processor=Pro is generating multiple LEAs per multiply!
YCbCr_to_RGB_row(uint8 * out,const uint8 * y,const uint8 * pcb,const uint8 * pcr,int count,int step)1557 static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step)
1558 {
1559 	int i;
1560 	for (i = 0; i < count; ++i)
1561 	{
1562 		int y_fixed = (y[i] << 16) + 32768;  // rounding
1563 		int r, g, b;
1564 		int cr = pcr[i] - 128;
1565 		int cb = pcb[i] - 128;
1566 		r = y_fixed + cr * float2fixed(1.40200f);
1567 		g = y_fixed - cr * float2fixed(0.71414f) - cb * float2fixed(0.34414f);
1568 		b = y_fixed + cb * float2fixed(1.77200f);
1569 		r >>= 16;
1570 		g >>= 16;
1571 		b >>= 16;
1572 		if ((unsigned)r > 255)
1573 		{
1574 			if (r < 0)
1575 				r = 0;
1576 			else
1577 				r = 255;
1578 		}
1579 		if ((unsigned)g > 255)
1580 		{
1581 			if (g < 0)
1582 				g = 0;
1583 			else
1584 				g = 255;
1585 		}
1586 		if ((unsigned)b > 255)
1587 		{
1588 			if (b < 0)
1589 				b = 0;
1590 			else
1591 				b = 255;
1592 		}
1593 		out[0] = (uint8)r;
1594 		out[1] = (uint8)g;
1595 		out[2] = (uint8)b;
1596 		out[3] = 255;
1597 		out += step;
1598 	}
1599 }
1600 
1601 #ifdef STBI_SIMD
1602 static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row;
1603 
stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)1604 void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func)
1605 {
1606 	stbi_YCbCr_installed = func;
1607 }
1608 #endif
1609 
1610 // clean up the temporary component buffers
cleanup_jpeg(jpeg * j)1611 static void cleanup_jpeg(jpeg *j)
1612 {
1613 	int i;
1614 	for (i = 0; i < j->s->img_n; ++i)
1615 	{
1616 		if (j->img_comp[i].data)
1617 		{
1618 			free(j->img_comp[i].raw_data);
1619 			j->img_comp[i].data = NULL;
1620 		}
1621 		if (j->img_comp[i].linebuf)
1622 		{
1623 			free(j->img_comp[i].linebuf);
1624 			j->img_comp[i].linebuf = NULL;
1625 		}
1626 	}
1627 }
1628 
1629 typedef struct
1630 {
1631 	resample_row_func resample;
1632 	uint8 *line0, *line1;
1633 	int hs, vs;   // expansion factor in each axis
1634 	int w_lores;  // horizontal pixels pre-expansion
1635 	int ystep;    // how far through vertical expansion we are
1636 	int ypos;     // which pre-expansion row we're on
1637 } stbi_resample;
1638 
load_jpeg_image(jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)1639 static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
1640 {
1641 	int n, decode_n;
1642 	// validate req_comp
1643 	if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
1644 	z->s->img_n = 0;
1645 
1646 	// load a jpeg image from whichever source
1647 	if (!decode_jpeg_image(z))
1648 	{
1649 		cleanup_jpeg(z);
1650 		return NULL;
1651 	}
1652 
1653 	// determine actual number of components to generate
1654 	n = req_comp ? req_comp : z->s->img_n;
1655 
1656 	if (z->s->img_n == 3 && n < 3)
1657 		decode_n = 1;
1658 	else
1659 		decode_n = z->s->img_n;
1660 
1661 	// resample and color-convert
1662 	{
1663 		int k;
1664 		uint i, j;
1665 		uint8 *output;
1666 		uint8 *coutput[4];
1667 
1668 		stbi_resample res_comp[4];
1669 
1670 		for (k = 0; k < decode_n; ++k)
1671 		{
1672 			stbi_resample *r = &res_comp[k];
1673 
1674 			// allocate line buffer big enough for upsampling off the edges
1675 			// with upsample factor of 4
1676 			z->img_comp[k].linebuf = (uint8 *)malloc(z->s->img_x + 3);
1677 			if (!z->img_comp[k].linebuf)
1678 			{
1679 				cleanup_jpeg(z);
1680 				return epuc("outofmem", "Out of memory");
1681 			}
1682 
1683 			r->hs = z->img_h_max / z->img_comp[k].h;
1684 			r->vs = z->img_v_max / z->img_comp[k].v;
1685 			r->ystep = r->vs >> 1;
1686 			r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
1687 			r->ypos = 0;
1688 			r->line0 = r->line1 = z->img_comp[k].data;
1689 
1690 			if (r->hs == 1 && r->vs == 1)
1691 				r->resample = resample_row_1;
1692 			else if (r->hs == 1 && r->vs == 2)
1693 				r->resample = resample_row_v_2;
1694 			else if (r->hs == 2 && r->vs == 1)
1695 				r->resample = resample_row_h_2;
1696 			else if (r->hs == 2 && r->vs == 2)
1697 				r->resample = resample_row_hv_2;
1698 			else
1699 				r->resample = resample_row_generic;
1700 		}
1701 
1702 		// can't error after this so, this is safe
1703 		output = (uint8 *)malloc(n * z->s->img_x * z->s->img_y + 1);
1704 		if (!output)
1705 		{
1706 			cleanup_jpeg(z);
1707 			return epuc("outofmem", "Out of memory");
1708 		}
1709 
1710 		// now go ahead and resample
1711 		for (j = 0; j < z->s->img_y; ++j)
1712 		{
1713 			uint8 *out = output + n * z->s->img_x * j;
1714 			for (k = 0; k < decode_n; ++k)
1715 			{
1716 				stbi_resample *r = &res_comp[k];
1717 				int y_bot = r->ystep >= (r->vs >> 1);
1718 				coutput[k] = r->resample(z->img_comp[k].linebuf,
1719 										 y_bot ? r->line1 : r->line0,
1720 										 y_bot ? r->line0 : r->line1,
1721 										 r->w_lores, r->hs);
1722 				if (++r->ystep >= r->vs)
1723 				{
1724 					r->ystep = 0;
1725 					r->line0 = r->line1;
1726 					if (++r->ypos < z->img_comp[k].y)
1727 						r->line1 += z->img_comp[k].w2;
1728 				}
1729 			}
1730 			if (n >= 3)
1731 			{
1732 				uint8 *y = coutput[0];
1733 				if (z->s->img_n == 3)
1734 				{
1735 #ifdef STBI_SIMD
1736 					stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n);
1737 #else
1738 					YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s->img_x, n);
1739 #endif
1740 				}
1741 				else
1742 					for (i = 0; i < z->s->img_x; ++i)
1743 					{
1744 						out[0] = out[1] = out[2] = y[i];
1745 						out[3] = 255;  // not used if n==3
1746 						out += n;
1747 					}
1748 			}
1749 			else
1750 			{
1751 				uint8 *y = coutput[0];
1752 				if (n == 1)
1753 					for (i = 0; i < z->s->img_x; ++i) out[i] = y[i];
1754 				else
1755 					for (i = 0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
1756 			}
1757 		}
1758 		cleanup_jpeg(z);
1759 		*out_x = z->s->img_x;
1760 		*out_y = z->s->img_y;
1761 		if (comp) *comp = z->s->img_n;  // report original components, not output
1762 		return output;
1763 	}
1764 }
1765 
stbi_jpeg_load(stbi * s,int * x,int * y,int * comp,int req_comp)1766 static unsigned char *stbi_jpeg_load(stbi *s, int *x, int *y, int *comp, int req_comp)
1767 {
1768 	jpeg j;
1769 	j.s = s;
1770 	return load_jpeg_image(&j, x, y, comp, req_comp);
1771 }
1772 
stbi_jpeg_test(stbi * s)1773 static int stbi_jpeg_test(stbi *s)
1774 {
1775 	int r;
1776 	jpeg j;
1777 	j.s = s;
1778 	r = decode_jpeg_header(&j, SCAN_type);
1779 	stbi_rewind(s);
1780 	return r;
1781 }
1782 
stbi_jpeg_info_raw(jpeg * j,int * x,int * y,int * comp)1783 static int stbi_jpeg_info_raw(jpeg *j, int *x, int *y, int *comp)
1784 {
1785 	if (!decode_jpeg_header(j, SCAN_header))
1786 	{
1787 		stbi_rewind(j->s);
1788 		return 0;
1789 	}
1790 	if (x) *x = j->s->img_x;
1791 	if (y) *y = j->s->img_y;
1792 	if (comp) *comp = j->s->img_n;
1793 	return 1;
1794 }
1795 
stbi_jpeg_info(stbi * s,int * x,int * y,int * comp)1796 static int stbi_jpeg_info(stbi *s, int *x, int *y, int *comp)
1797 {
1798 	jpeg j;
1799 	j.s = s;
1800 	return stbi_jpeg_info_raw(&j, x, y, comp);
1801 }
1802 
1803 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
1804 //    simple implementation
1805 //      - all input must be provided in an upfront buffer
1806 //      - all output is written to a single output buffer (can malloc/realloc)
1807 //    performance
1808 //      - fast huffman
1809 
1810 // fast-way is faster to check than jpeg huffman, but slow way is slower
1811 #define ZFAST_BITS 9  // accelerate all cases in default tables
1812 #define ZFAST_MASK ((1 << ZFAST_BITS) - 1)
1813 
1814 // zlib-style huffman encoding
1815 // (jpegs packs from left, zlib from right, so can't share code)
1816 typedef struct
1817 {
1818 	uint16 fast[1 << ZFAST_BITS];
1819 	uint16 firstcode[16];
1820 	int maxcode[17];
1821 	uint16 firstsymbol[16];
1822 	uint8 size[288];
1823 	uint16 value[288];
1824 } zhuffman;
1825 
bitreverse16(int n)1826 stbi_inline static int bitreverse16(int n)
1827 {
1828 	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
1829 	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
1830 	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
1831 	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
1832 	return n;
1833 }
1834 
bit_reverse(int v,int bits)1835 stbi_inline static int bit_reverse(int v, int bits)
1836 {
1837 	assert(bits <= 16);
1838 	// to bit reverse n bits, reverse 16 and shift
1839 	// e.g. 11 bits, bit reverse and shift away 5
1840 	return bitreverse16(v) >> (16 - bits);
1841 }
1842 
zbuild_huffman(zhuffman * z,uint8 * sizelist,int num)1843 static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num)
1844 {
1845 	int i, k = 0;
1846 	int code, next_code[16], sizes[17];
1847 
1848 	// DEFLATE spec for generating codes
1849 	memset(sizes, 0, sizeof(sizes));
1850 	memset(z->fast, 255, sizeof(z->fast));
1851 	for (i = 0; i < num; ++i)
1852 		++sizes[sizelist[i]];
1853 	sizes[0] = 0;
1854 	for (i = 1; i < 16; ++i)
1855 		assert(sizes[i] <= (1 << i));
1856 	code = 0;
1857 	for (i = 1; i < 16; ++i)
1858 	{
1859 		next_code[i] = code;
1860 		z->firstcode[i] = (uint16)code;
1861 		z->firstsymbol[i] = (uint16)k;
1862 		code = (code + sizes[i]);
1863 		if (sizes[i])
1864 			if (code - 1 >= (1 << i)) return e("bad codelengths", "Corrupt JPEG");
1865 		z->maxcode[i] = code << (16 - i);  // preshift for inner loop
1866 		code <<= 1;
1867 		k += sizes[i];
1868 	}
1869 	z->maxcode[16] = 0x10000;  // sentinel
1870 	for (i = 0; i < num; ++i)
1871 	{
1872 		int s = sizelist[i];
1873 		if (s)
1874 		{
1875 			int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
1876 			z->size[c] = (uint8)s;
1877 			z->value[c] = (uint16)i;
1878 			if (s <= ZFAST_BITS)
1879 			{
1880 				int k = bit_reverse(next_code[s], s);
1881 				while (k < (1 << ZFAST_BITS))
1882 				{
1883 					z->fast[k] = (uint16)c;
1884 					k += (1 << s);
1885 				}
1886 			}
1887 			++next_code[s];
1888 		}
1889 	}
1890 	return 1;
1891 }
1892 
1893 // zlib-from-memory implementation for PNG reading
1894 //    because PNG allows splitting the zlib stream arbitrarily,
1895 //    and it's annoying structurally to have PNG call ZLIB call PNG,
1896 //    we require PNG read all the IDATs and combine them into a single
1897 //    memory buffer
1898 
1899 typedef struct
1900 {
1901 	uint8 *zbuffer, *zbuffer_end;
1902 	int num_bits;
1903 	uint32 code_buffer;
1904 
1905 	char *zout;
1906 	char *zout_start;
1907 	char *zout_end;
1908 	int z_expandable;
1909 
1910 	zhuffman z_length, z_distance;
1911 } zbuf;
1912 
zget8(zbuf * z)1913 stbi_inline static int zget8(zbuf *z)
1914 {
1915 	if (z->zbuffer >= z->zbuffer_end) return 0;
1916 	return *z->zbuffer++;
1917 }
1918 
fill_bits(zbuf * z)1919 static void fill_bits(zbuf *z)
1920 {
1921 	do
1922 	{
1923 		assert(z->code_buffer < (1U << z->num_bits));
1924 		z->code_buffer |= zget8(z) << z->num_bits;
1925 		z->num_bits += 8;
1926 	} while (z->num_bits <= 24);
1927 }
1928 
zreceive(zbuf * z,int n)1929 stbi_inline static unsigned int zreceive(zbuf *z, int n)
1930 {
1931 	unsigned int k;
1932 	if (z->num_bits < n) fill_bits(z);
1933 	k = z->code_buffer & ((1 << n) - 1);
1934 	z->code_buffer >>= n;
1935 	z->num_bits -= n;
1936 	return k;
1937 }
1938 
zhuffman_decode(zbuf * a,zhuffman * z)1939 stbi_inline static int zhuffman_decode(zbuf *a, zhuffman *z)
1940 {
1941 	int b, s, k;
1942 	if (a->num_bits < 16) fill_bits(a);
1943 	b = z->fast[a->code_buffer & ZFAST_MASK];
1944 	if (b < 0xffff)
1945 	{
1946 		s = z->size[b];
1947 		a->code_buffer >>= s;
1948 		a->num_bits -= s;
1949 		return z->value[b];
1950 	}
1951 
1952 	// not resolved by fast table, so compute it the slow way
1953 	// use jpeg approach, which requires MSbits at top
1954 	k = bit_reverse(a->code_buffer, 16);
1955 	for (s = ZFAST_BITS + 1;; ++s)
1956 		if (k < z->maxcode[s])
1957 			break;
1958 	if (s == 16) return -1;  // invalid code!
1959 	// code size is s, so:
1960 	b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
1961 	assert(z->size[b] == s);
1962 	a->code_buffer >>= s;
1963 	a->num_bits -= s;
1964 	return z->value[b];
1965 }
1966 
expand(zbuf * z,int n)1967 static int expand(zbuf *z, int n)  // need to make room for n bytes
1968 {
1969 	char *q;
1970 	int cur, limit;
1971 	if (!z->z_expandable) return e("output buffer limit", "Corrupt PNG");
1972 	cur = (int)(z->zout - z->zout_start);
1973 	limit = (int)(z->zout_end - z->zout_start);
1974 	while (cur + n > limit)
1975 		limit *= 2;
1976 	q = (char *)realloc(z->zout_start, limit);
1977 	if (q == NULL) return e("outofmem", "Out of memory");
1978 	z->zout_start = q;
1979 	z->zout = q + cur;
1980 	z->zout_end = q + limit;
1981 	return 1;
1982 }
1983 
1984 static int length_base[31] = {
1985 	3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
1986 	15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
1987 	67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
1988 
1989 static int length_extra[31] =
1990 	{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0};
1991 
1992 static int dist_base[32] = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
1993 							257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0};
1994 
1995 static int dist_extra[32] =
1996 	{0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
1997 
parse_huffman_block(zbuf * a)1998 static int parse_huffman_block(zbuf *a)
1999 {
2000 	for (;;)
2001 	{
2002 		int z = zhuffman_decode(a, &a->z_length);
2003 		if (z < 256)
2004 		{
2005 			if (z < 0) return e("bad huffman code", "Corrupt PNG");  // error in huffman codes
2006 			if (a->zout >= a->zout_end)
2007 				if (!expand(a, 1)) return 0;
2008 			*a->zout++ = (char)z;
2009 		}
2010 		else
2011 		{
2012 			uint8 *p;
2013 			int len, dist;
2014 			if (z == 256) return 1;
2015 			z -= 257;
2016 			len = length_base[z];
2017 			if (length_extra[z]) len += zreceive(a, length_extra[z]);
2018 			z = zhuffman_decode(a, &a->z_distance);
2019 			if (z < 0) return e("bad huffman code", "Corrupt PNG");
2020 			dist = dist_base[z];
2021 			if (dist_extra[z]) dist += zreceive(a, dist_extra[z]);
2022 			if (a->zout - a->zout_start < dist) return e("bad dist", "Corrupt PNG");
2023 			if (a->zout + len > a->zout_end)
2024 				if (!expand(a, len)) return 0;
2025 			p = (uint8 *)(a->zout - dist);
2026 			while (len--)
2027 				*a->zout++ = *p++;
2028 		}
2029 	}
2030 }
2031 
compute_huffman_codes(zbuf * a)2032 static int compute_huffman_codes(zbuf *a)
2033 {
2034 	static uint8 length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
2035 	zhuffman z_codelength;
2036 	uint8 lencodes[286 + 32 + 137];  //padding for maximum single op
2037 	uint8 codelength_sizes[19];
2038 	int i, n;
2039 
2040 	int hlit = zreceive(a, 5) + 257;
2041 	int hdist = zreceive(a, 5) + 1;
2042 	int hclen = zreceive(a, 4) + 4;
2043 
2044 	memset(codelength_sizes, 0, sizeof(codelength_sizes));
2045 	for (i = 0; i < hclen; ++i)
2046 	{
2047 		int s = zreceive(a, 3);
2048 		codelength_sizes[length_dezigzag[i]] = (uint8)s;
2049 	}
2050 	if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
2051 
2052 	n = 0;
2053 	while (n < hlit + hdist)
2054 	{
2055 		int c = zhuffman_decode(a, &z_codelength);
2056 		assert(c >= 0 && c < 19);
2057 		if (c < 16)
2058 			lencodes[n++] = (uint8)c;
2059 		else if (c == 16)
2060 		{
2061 			c = zreceive(a, 2) + 3;
2062 			memset(lencodes + n, lencodes[n - 1], c);
2063 			n += c;
2064 		}
2065 		else if (c == 17)
2066 		{
2067 			c = zreceive(a, 3) + 3;
2068 			memset(lencodes + n, 0, c);
2069 			n += c;
2070 		}
2071 		else
2072 		{
2073 			assert(c == 18);
2074 			c = zreceive(a, 7) + 11;
2075 			memset(lencodes + n, 0, c);
2076 			n += c;
2077 		}
2078 	}
2079 	if (n != hlit + hdist) return e("bad codelengths", "Corrupt PNG");
2080 	if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
2081 	if (!zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
2082 	return 1;
2083 }
2084 
parse_uncompressed_block(zbuf * a)2085 static int parse_uncompressed_block(zbuf *a)
2086 {
2087 	uint8 header[4];
2088 	int len, nlen, k;
2089 	if (a->num_bits & 7)
2090 		zreceive(a, a->num_bits & 7);  // discard
2091 	// drain the bit-packed data into header
2092 	k = 0;
2093 	while (a->num_bits > 0)
2094 	{
2095 		header[k++] = (uint8)(a->code_buffer & 255);  // wtf this warns?
2096 		a->code_buffer >>= 8;
2097 		a->num_bits -= 8;
2098 	}
2099 	assert(a->num_bits == 0);
2100 	// now fill header the normal way
2101 	while (k < 4)
2102 		header[k++] = (uint8)zget8(a);
2103 	len = header[1] * 256 + header[0];
2104 	nlen = header[3] * 256 + header[2];
2105 	if (nlen != (len ^ 0xffff)) return e("zlib corrupt", "Corrupt PNG");
2106 	if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer", "Corrupt PNG");
2107 	if (a->zout + len > a->zout_end)
2108 		if (!expand(a, len)) return 0;
2109 	memcpy(a->zout, a->zbuffer, len);
2110 	a->zbuffer += len;
2111 	a->zout += len;
2112 	return 1;
2113 }
2114 
parse_zlib_header(zbuf * a)2115 static int parse_zlib_header(zbuf *a)
2116 {
2117 	int cmf = zget8(a);
2118 	int cm = cmf & 15;
2119 	/* int cinfo = cmf >> 4; */
2120 	int flg = zget8(a);
2121 	if ((cmf * 256 + flg) % 31 != 0) return e("bad zlib header", "Corrupt PNG");  // zlib spec
2122 	if (flg & 32) return e("no preset dict", "Corrupt PNG");                      // preset dictionary not allowed in png
2123 	if (cm != 8) return e("bad compression", "Corrupt PNG");                      // DEFLATE required for png
2124 	// window = 1 << (8 + cinfo)... but who cares, we fully buffer output
2125 	return 1;
2126 }
2127 
2128 // @TODO: should statically initialize these for optimal thread safety
2129 static uint8 default_length[288], default_distance[32];
init_defaults(void)2130 static void init_defaults(void)
2131 {
2132 	int i;  // use <= to match clearly with spec
2133 	for (i = 0; i <= 143; ++i) default_length[i] = 8;
2134 	for (; i <= 255; ++i) default_length[i] = 9;
2135 	for (; i <= 279; ++i) default_length[i] = 7;
2136 	for (; i <= 287; ++i) default_length[i] = 8;
2137 
2138 	for (i = 0; i <= 31; ++i) default_distance[i] = 5;
2139 }
2140 
2141 int stbi_png_partial;  // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead
parse_zlib(zbuf * a,int parse_header)2142 static int parse_zlib(zbuf *a, int parse_header)
2143 {
2144 	int final, type;
2145 	if (parse_header)
2146 		if (!parse_zlib_header(a)) return 0;
2147 	a->num_bits = 0;
2148 	a->code_buffer = 0;
2149 	do
2150 	{
2151 		final = zreceive(a, 1);
2152 		type = zreceive(a, 2);
2153 		if (type == 0)
2154 		{
2155 			if (!parse_uncompressed_block(a)) return 0;
2156 		}
2157 		else if (type == 3)
2158 		{
2159 			return 0;
2160 		}
2161 		else
2162 		{
2163 			if (type == 1)
2164 			{
2165 				// use fixed code lengths
2166 				if (!default_distance[31]) init_defaults();
2167 				if (!zbuild_huffman(&a->z_length, default_length, 288)) return 0;
2168 				if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0;
2169 			}
2170 			else
2171 			{
2172 				if (!compute_huffman_codes(a)) return 0;
2173 			}
2174 			if (!parse_huffman_block(a)) return 0;
2175 		}
2176 		if (stbi_png_partial && a->zout - a->zout_start > 65536)
2177 			break;
2178 	} while (!final);
2179 	return 1;
2180 }
2181 
do_zlib(zbuf * a,char * obuf,int olen,int exp,int parse_header)2182 static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header)
2183 {
2184 	a->zout_start = obuf;
2185 	a->zout = obuf;
2186 	a->zout_end = obuf + olen;
2187 	a->z_expandable = exp;
2188 
2189 	return parse_zlib(a, parse_header);
2190 }
2191 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)2192 char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
2193 {
2194 	zbuf a;
2195 	char *p = (char *)malloc(initial_size);
2196 	if (p == NULL) return NULL;
2197 	a.zbuffer = (uint8 *)buffer;
2198 	a.zbuffer_end = (uint8 *)buffer + len;
2199 	if (do_zlib(&a, p, initial_size, 1, 1))
2200 	{
2201 		if (outlen) *outlen = (int)(a.zout - a.zout_start);
2202 		return a.zout_start;
2203 	}
2204 	else
2205 	{
2206 		free(a.zout_start);
2207 		return NULL;
2208 	}
2209 }
2210 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)2211 char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
2212 {
2213 	return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
2214 }
2215 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)2216 char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
2217 {
2218 	zbuf a;
2219 	char *p = (char *)malloc(initial_size);
2220 	if (p == NULL) return NULL;
2221 	a.zbuffer = (uint8 *)buffer;
2222 	a.zbuffer_end = (uint8 *)buffer + len;
2223 	if (do_zlib(&a, p, initial_size, 1, parse_header))
2224 	{
2225 		if (outlen) *outlen = (int)(a.zout - a.zout_start);
2226 		return a.zout_start;
2227 	}
2228 	else
2229 	{
2230 		free(a.zout_start);
2231 		return NULL;
2232 	}
2233 }
2234 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)2235 int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
2236 {
2237 	zbuf a;
2238 	a.zbuffer = (uint8 *)ibuffer;
2239 	a.zbuffer_end = (uint8 *)ibuffer + ilen;
2240 	if (do_zlib(&a, obuffer, olen, 0, 1))
2241 		return (int)(a.zout - a.zout_start);
2242 	else
2243 		return -1;
2244 }
2245 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)2246 char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
2247 {
2248 	zbuf a;
2249 	char *p = (char *)malloc(16384);
2250 	if (p == NULL) return NULL;
2251 	a.zbuffer = (uint8 *)buffer;
2252 	a.zbuffer_end = (uint8 *)buffer + len;
2253 	if (do_zlib(&a, p, 16384, 1, 0))
2254 	{
2255 		if (outlen) *outlen = (int)(a.zout - a.zout_start);
2256 		return a.zout_start;
2257 	}
2258 	else
2259 	{
2260 		free(a.zout_start);
2261 		return NULL;
2262 	}
2263 }
2264 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)2265 int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
2266 {
2267 	zbuf a;
2268 	a.zbuffer = (uint8 *)ibuffer;
2269 	a.zbuffer_end = (uint8 *)ibuffer + ilen;
2270 	if (do_zlib(&a, obuffer, olen, 0, 0))
2271 		return (int)(a.zout - a.zout_start);
2272 	else
2273 		return -1;
2274 }
2275 
2276 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
2277 //    simple implementation
2278 //      - only 8-bit samples
2279 //      - no CRC checking
2280 //      - allocates lots of intermediate memory
2281 //        - avoids problem of streaming data between subsystems
2282 //        - avoids explicit window management
2283 //    performance
2284 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
2285 
2286 typedef struct
2287 {
2288 	uint32 length;
2289 	uint32 type;
2290 } chunk;
2291 
2292 #define PNG_TYPE(a, b, c, d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
2293 
get_chunk_header(stbi * s)2294 static chunk get_chunk_header(stbi *s)
2295 {
2296 	chunk c;
2297 	c.length = get32(s);
2298 	c.type = get32(s);
2299 	return c;
2300 }
2301 
check_png_header(stbi * s)2302 static int check_png_header(stbi *s)
2303 {
2304 	static uint8 png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10};
2305 	int i;
2306 	for (i = 0; i < 8; ++i)
2307 		if (get8u(s) != png_sig[i]) return e("bad png sig", "Not a PNG");
2308 	return 1;
2309 }
2310 
2311 typedef struct
2312 {
2313 	stbi *s;
2314 	uint8 *idata, *expanded, *out;
2315 } png;
2316 
2317 enum
2318 {
2319 	F_none = 0,
2320 	F_sub = 1,
2321 	F_up = 2,
2322 	F_avg = 3,
2323 	F_paeth = 4,
2324 	F_avg_first,
2325 	F_paeth_first
2326 };
2327 
2328 static uint8 first_row_filter[5] =
2329 	{
2330 		F_none, F_sub, F_none, F_avg_first, F_paeth_first};
2331 
paeth(int a,int b,int c)2332 static int paeth(int a, int b, int c)
2333 {
2334 	int p = a + b - c;
2335 	int pa = abs(p - a);
2336 	int pb = abs(p - b);
2337 	int pc = abs(p - c);
2338 	if (pa <= pb && pa <= pc) return a;
2339 	if (pb <= pc) return b;
2340 	return c;
2341 }
2342 
2343 // create the png data from post-deflated data
create_png_image_raw(png * a,uint8 * raw,uint32 raw_len,int out_n,uint32 x,uint32 y)2344 static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y)
2345 {
2346 	stbi *s = a->s;
2347 	uint32 i, j, stride = x * out_n;
2348 	int k;
2349 	int img_n = s->img_n;  // copy it into a local for later
2350 	assert(out_n == s->img_n || out_n == s->img_n + 1);
2351 	if (stbi_png_partial) y = 1;
2352 	a->out = (uint8 *)malloc(x * y * out_n);
2353 	if (!a->out) return e("outofmem", "Out of memory");
2354 	if (!stbi_png_partial)
2355 	{
2356 		if (s->img_x == x && s->img_y == y)
2357 		{
2358 			if (raw_len != (img_n * x + 1) * y) return e("not enough pixels", "Corrupt PNG");
2359 		}
2360 		else
2361 		{  // interlaced:
2362 			if (raw_len < (img_n * x + 1) * y) return e("not enough pixels", "Corrupt PNG");
2363 		}
2364 	}
2365 	for (j = 0; j < y; ++j)
2366 	{
2367 		uint8 *cur = a->out + stride * j;
2368 		uint8 *prior = cur - stride;
2369 		int filter = *raw++;
2370 		if (filter > 4) return e("invalid filter", "Corrupt PNG");
2371 		// if first row, use special filter that doesn't sample previous row
2372 		if (j == 0) filter = first_row_filter[filter];
2373 		// handle first pixel explicitly
2374 		for (k = 0; k < img_n; ++k)
2375 		{
2376 			switch (filter)
2377 			{
2378 				case F_none:
2379 					cur[k] = raw[k];
2380 					break;
2381 				case F_sub:
2382 					cur[k] = raw[k];
2383 					break;
2384 				case F_up:
2385 					cur[k] = raw[k] + prior[k];
2386 					break;
2387 				case F_avg:
2388 					cur[k] = raw[k] + (prior[k] >> 1);
2389 					break;
2390 				case F_paeth:
2391 					cur[k] = (uint8)(raw[k] + paeth(0, prior[k], 0));
2392 					break;
2393 				case F_avg_first:
2394 					cur[k] = raw[k];
2395 					break;
2396 				case F_paeth_first:
2397 					cur[k] = raw[k];
2398 					break;
2399 			}
2400 		}
2401 		if (img_n != out_n) cur[img_n] = 255;
2402 		raw += img_n;
2403 		cur += out_n;
2404 		prior += out_n;
2405 		// this is a little gross, so that we don't switch per-pixel or per-component
2406 		if (img_n == out_n)
2407 		{
2408 #define CASE(f)                                                                  \
2409 	case f:                                                                      \
2410 		for (i = x - 1; i >= 1; --i, raw += img_n, cur += img_n, prior += img_n) \
2411 			for (k = 0; k < img_n; ++k)
2412 			switch (filter)
2413 			{
2414 				CASE(F_none)
2415 				cur[k] = raw[k];
2416 				break;
2417 				CASE(F_sub)
2418 				cur[k] = raw[k] + cur[k - img_n];
2419 				break;
2420 				CASE(F_up)
2421 				cur[k] = raw[k] + prior[k];
2422 				break;
2423 				CASE(F_avg)
2424 				cur[k] = raw[k] + ((prior[k] + cur[k - img_n]) >> 1);
2425 				break;
2426 				CASE(F_paeth)
2427 				cur[k] = (uint8)(raw[k] + paeth(cur[k - img_n], prior[k], prior[k - img_n]));
2428 				break;
2429 				CASE(F_avg_first)
2430 				cur[k] = raw[k] + (cur[k - img_n] >> 1);
2431 				break;
2432 				CASE(F_paeth_first)
2433 				cur[k] = (uint8)(raw[k] + paeth(cur[k - img_n], 0, 0));
2434 				break;
2435 			}
2436 #undef CASE
2437 		}
2438 		else
2439 		{
2440 			assert(img_n + 1 == out_n);
2441 #define CASE(f)                                                                                    \
2442 	case f:                                                                                        \
2443 		for (i = x - 1; i >= 1; --i, cur[img_n] = 255, raw += img_n, cur += out_n, prior += out_n) \
2444 			for (k = 0; k < img_n; ++k)
2445 			switch (filter)
2446 			{
2447 				CASE(F_none)
2448 				cur[k] = raw[k];
2449 				break;
2450 				CASE(F_sub)
2451 				cur[k] = raw[k] + cur[k - out_n];
2452 				break;
2453 				CASE(F_up)
2454 				cur[k] = raw[k] + prior[k];
2455 				break;
2456 				CASE(F_avg)
2457 				cur[k] = raw[k] + ((prior[k] + cur[k - out_n]) >> 1);
2458 				break;
2459 				CASE(F_paeth)
2460 				cur[k] = (uint8)(raw[k] + paeth(cur[k - out_n], prior[k], prior[k - out_n]));
2461 				break;
2462 				CASE(F_avg_first)
2463 				cur[k] = raw[k] + (cur[k - out_n] >> 1);
2464 				break;
2465 				CASE(F_paeth_first)
2466 				cur[k] = (uint8)(raw[k] + paeth(cur[k - out_n], 0, 0));
2467 				break;
2468 			}
2469 #undef CASE
2470 		}
2471 	}
2472 	return 1;
2473 }
2474 
create_png_image(png * a,uint8 * raw,uint32 raw_len,int out_n,int interlaced)2475 static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced)
2476 {
2477 	uint8 *final;
2478 	int p;
2479 	int save;
2480 	if (!interlaced)
2481 		return create_png_image_raw(a, raw, raw_len, out_n, a->s->img_x, a->s->img_y);
2482 	save = stbi_png_partial;
2483 	stbi_png_partial = 0;
2484 
2485 	// de-interlacing
2486 	final = (uint8 *)malloc(a->s->img_x * a->s->img_y * out_n);
2487 	for (p = 0; p < 7; ++p)
2488 	{
2489 		int xorig[] = {0, 4, 0, 2, 0, 1, 0};
2490 		int yorig[] = {0, 0, 4, 0, 2, 0, 1};
2491 		int xspc[] = {8, 8, 4, 4, 2, 2, 1};
2492 		int yspc[] = {8, 8, 8, 4, 4, 2, 2};
2493 		int i, j, x, y;
2494 		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
2495 		x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
2496 		y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
2497 		if (x && y)
2498 		{
2499 			if (!create_png_image_raw(a, raw, raw_len, out_n, x, y))
2500 			{
2501 				free(final);
2502 				return 0;
2503 			}
2504 			for (j = 0; j < y; ++j)
2505 				for (i = 0; i < x; ++i)
2506 					memcpy(final + (j * yspc[p] + yorig[p]) * a->s->img_x * out_n + (i * xspc[p] + xorig[p]) * out_n,
2507 						   a->out + (j * x + i) * out_n, out_n);
2508 			free(a->out);
2509 			raw += (x * out_n + 1) * y;
2510 			raw_len -= (x * out_n + 1) * y;
2511 		}
2512 	}
2513 	a->out = final;
2514 
2515 	stbi_png_partial = save;
2516 	return 1;
2517 }
2518 
compute_transparency(png * z,uint8 tc[3],int out_n)2519 static int compute_transparency(png *z, uint8 tc[3], int out_n)
2520 {
2521 	stbi *s = z->s;
2522 	uint32 i, pixel_count = s->img_x * s->img_y;
2523 	uint8 *p = z->out;
2524 
2525 	// compute color-based transparency, assuming we've
2526 	// already got 255 as the alpha value in the output
2527 	assert(out_n == 2 || out_n == 4);
2528 
2529 	if (out_n == 2)
2530 	{
2531 		for (i = 0; i < pixel_count; ++i)
2532 		{
2533 			p[1] = (p[0] == tc[0] ? 0 : 255);
2534 			p += 2;
2535 		}
2536 	}
2537 	else
2538 	{
2539 		for (i = 0; i < pixel_count; ++i)
2540 		{
2541 			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
2542 				p[3] = 0;
2543 			p += 4;
2544 		}
2545 	}
2546 	return 1;
2547 }
2548 
expand_palette(png * a,uint8 * palette,int len,int pal_img_n)2549 static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n)
2550 {
2551 	uint32 i, pixel_count = a->s->img_x * a->s->img_y;
2552 	uint8 *p, *temp_out, *orig = a->out;
2553 
2554 	p = (uint8 *)malloc(pixel_count * pal_img_n);
2555 	if (p == NULL) return e("outofmem", "Out of memory");
2556 
2557 	// between here and free(out) below, exitting would leak
2558 	temp_out = p;
2559 
2560 	if (pal_img_n == 3)
2561 	{
2562 		for (i = 0; i < pixel_count; ++i)
2563 		{
2564 			int n = orig[i] * 4;
2565 			p[0] = palette[n];
2566 			p[1] = palette[n + 1];
2567 			p[2] = palette[n + 2];
2568 			p += 3;
2569 		}
2570 	}
2571 	else
2572 	{
2573 		for (i = 0; i < pixel_count; ++i)
2574 		{
2575 			int n = orig[i] * 4;
2576 			p[0] = palette[n];
2577 			p[1] = palette[n + 1];
2578 			p[2] = palette[n + 2];
2579 			p[3] = palette[n + 3];
2580 			p += 4;
2581 		}
2582 	}
2583 	free(a->out);
2584 	a->out = temp_out;
2585 
2586 	STBI_NOTUSED(len);
2587 
2588 	return 1;
2589 }
2590 
2591 static int stbi_unpremultiply_on_load = 0;
2592 static int stbi_de_iphone_flag = 0;
2593 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)2594 void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
2595 {
2596 	stbi_unpremultiply_on_load = flag_true_if_should_unpremultiply;
2597 }
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)2598 void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
2599 {
2600 	stbi_de_iphone_flag = flag_true_if_should_convert;
2601 }
2602 
stbi_de_iphone(png * z)2603 static void stbi_de_iphone(png *z)
2604 {
2605 	stbi *s = z->s;
2606 	uint32 i, pixel_count = s->img_x * s->img_y;
2607 	uint8 *p = z->out;
2608 
2609 	if (s->img_out_n == 3)
2610 	{  // convert bgr to rgb
2611 		for (i = 0; i < pixel_count; ++i)
2612 		{
2613 			uint8 t = p[0];
2614 			p[0] = p[2];
2615 			p[2] = t;
2616 			p += 3;
2617 		}
2618 	}
2619 	else
2620 	{
2621 		assert(s->img_out_n == 4);
2622 		if (stbi_unpremultiply_on_load)
2623 		{
2624 			// convert bgr to rgb and unpremultiply
2625 			for (i = 0; i < pixel_count; ++i)
2626 			{
2627 				uint8 a = p[3];
2628 				uint8 t = p[0];
2629 				if (a)
2630 				{
2631 					p[0] = p[2] * 255 / a;
2632 					p[1] = p[1] * 255 / a;
2633 					p[2] = t * 255 / a;
2634 				}
2635 				else
2636 				{
2637 					p[0] = p[2];
2638 					p[2] = t;
2639 				}
2640 				p += 4;
2641 			}
2642 		}
2643 		else
2644 		{
2645 			// convert bgr to rgb
2646 			for (i = 0; i < pixel_count; ++i)
2647 			{
2648 				uint8 t = p[0];
2649 				p[0] = p[2];
2650 				p[2] = t;
2651 				p += 4;
2652 			}
2653 		}
2654 	}
2655 }
2656 
parse_png_file(png * z,int scan,int req_comp)2657 static int parse_png_file(png *z, int scan, int req_comp)
2658 {
2659 	uint8 palette[1024], pal_img_n = 0;
2660 	uint8 has_trans = 0, tc[3];
2661 	uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
2662 	int first = 1, k, interlace = 0, iphone = 0;
2663 	stbi *s = z->s;
2664 
2665 	z->expanded = NULL;
2666 	z->idata = NULL;
2667 	z->out = NULL;
2668 
2669 	if (!check_png_header(s)) return 0;
2670 
2671 	if (scan == SCAN_type) return 1;
2672 
2673 	for (;;)
2674 	{
2675 		chunk c = get_chunk_header(s);
2676 		switch (c.type)
2677 		{
2678 			case PNG_TYPE('C', 'g', 'B', 'I'):
2679 				iphone = stbi_de_iphone_flag;
2680 				skip(s, c.length);
2681 				break;
2682 			case PNG_TYPE('I', 'H', 'D', 'R'):
2683 			{
2684 				int depth, color, comp, filter;
2685 				if (!first) return e("multiple IHDR", "Corrupt PNG");
2686 				first = 0;
2687 				if (c.length != 13) return e("bad IHDR len", "Corrupt PNG");
2688 				s->img_x = get32(s);
2689 				if (s->img_x > (1 << 24)) return e("too large", "Very large image (corrupt?)");
2690 				s->img_y = get32(s);
2691 				if (s->img_y > (1 << 24)) return e("too large", "Very large image (corrupt?)");
2692 				depth = get8(s);
2693 				if (depth != 8) return e("8bit only", "PNG not supported: 8-bit only");
2694 				color = get8(s);
2695 				if (color > 6) return e("bad ctype", "Corrupt PNG");
2696 				if (color == 3)
2697 					pal_img_n = 3;
2698 				else if (color & 1)
2699 					return e("bad ctype", "Corrupt PNG");
2700 				comp = get8(s);
2701 				if (comp) return e("bad comp method", "Corrupt PNG");
2702 				filter = get8(s);
2703 				if (filter) return e("bad filter method", "Corrupt PNG");
2704 				interlace = get8(s);
2705 				if (interlace > 1) return e("bad interlace method", "Corrupt PNG");
2706 				if (!s->img_x || !s->img_y) return e("0-pixel image", "Corrupt PNG");
2707 				if (!pal_img_n)
2708 				{
2709 					s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
2710 					if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode");
2711 					if (scan == SCAN_header) return 1;
2712 				}
2713 				else
2714 				{
2715 					// if paletted, then pal_n is our final components, and
2716 					// img_n is # components to decompress/filter.
2717 					s->img_n = 1;
2718 					if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large", "Corrupt PNG");
2719 					// if SCAN_header, have to scan to see if we have a tRNS
2720 				}
2721 				break;
2722 			}
2723 
2724 			case PNG_TYPE('P', 'L', 'T', 'E'):
2725 			{
2726 				if (first) return e("first not IHDR", "Corrupt PNG");
2727 				if (c.length > 256 * 3) return e("invalid PLTE", "Corrupt PNG");
2728 				pal_len = c.length / 3;
2729 				if (pal_len * 3 != c.length) return e("invalid PLTE", "Corrupt PNG");
2730 				for (i = 0; i < pal_len; ++i)
2731 				{
2732 					palette[i * 4 + 0] = get8u(s);
2733 					palette[i * 4 + 1] = get8u(s);
2734 					palette[i * 4 + 2] = get8u(s);
2735 					palette[i * 4 + 3] = 255;
2736 				}
2737 				break;
2738 			}
2739 
2740 			case PNG_TYPE('t', 'R', 'N', 'S'):
2741 			{
2742 				if (first) return e("first not IHDR", "Corrupt PNG");
2743 				if (z->idata) return e("tRNS after IDAT", "Corrupt PNG");
2744 				if (pal_img_n)
2745 				{
2746 					if (scan == SCAN_header)
2747 					{
2748 						s->img_n = 4;
2749 						return 1;
2750 					}
2751 					if (pal_len == 0) return e("tRNS before PLTE", "Corrupt PNG");
2752 					if (c.length > pal_len) return e("bad tRNS len", "Corrupt PNG");
2753 					pal_img_n = 4;
2754 					for (i = 0; i < c.length; ++i)
2755 						palette[i * 4 + 3] = get8u(s);
2756 				}
2757 				else
2758 				{
2759 					if (!(s->img_n & 1)) return e("tRNS with alpha", "Corrupt PNG");
2760 					if (c.length != (uint32)s->img_n * 2) return e("bad tRNS len", "Corrupt PNG");
2761 					has_trans = 1;
2762 					for (k = 0; k < s->img_n; ++k)
2763 						tc[k] = (uint8)get16(s);  // non 8-bit images will be larger
2764 				}
2765 				break;
2766 			}
2767 
2768 			case PNG_TYPE('I', 'D', 'A', 'T'):
2769 			{
2770 				if (first) return e("first not IHDR", "Corrupt PNG");
2771 				if (pal_img_n && !pal_len) return e("no PLTE", "Corrupt PNG");
2772 				if (scan == SCAN_header)
2773 				{
2774 					s->img_n = pal_img_n;
2775 					return 1;
2776 				}
2777 				if (ioff + c.length > idata_limit)
2778 				{
2779 					uint8 *p;
2780 					if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
2781 					while (ioff + c.length > idata_limit)
2782 						idata_limit *= 2;
2783 					p = (uint8 *)realloc(z->idata, idata_limit);
2784 					if (p == NULL) return e("outofmem", "Out of memory");
2785 					z->idata = p;
2786 				}
2787 				if (!getn(s, z->idata + ioff, c.length)) return e("outofdata", "Corrupt PNG");
2788 				ioff += c.length;
2789 				break;
2790 			}
2791 
2792 			case PNG_TYPE('I', 'E', 'N', 'D'):
2793 			{
2794 				uint32 raw_len;
2795 				if (first) return e("first not IHDR", "Corrupt PNG");
2796 				if (scan != SCAN_load) return 1;
2797 				if (z->idata == NULL) return e("no IDAT", "Corrupt PNG");
2798 				z->expanded = (uint8 *)stbi_zlib_decode_malloc_guesssize_headerflag((char *)z->idata, ioff, 16384, (int *)&raw_len, !iphone);
2799 				if (z->expanded == NULL) return 0;  // zlib should set error
2800 				free(z->idata);
2801 				z->idata = NULL;
2802 				if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
2803 					s->img_out_n = s->img_n + 1;
2804 				else
2805 					s->img_out_n = s->img_n;
2806 				if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0;
2807 				if (has_trans)
2808 					if (!compute_transparency(z, tc, s->img_out_n)) return 0;
2809 				if (iphone && s->img_out_n > 2)
2810 					stbi_de_iphone(z);
2811 				if (pal_img_n)
2812 				{
2813 					// pal_img_n == 3 or 4
2814 					s->img_n = pal_img_n;  // record the actual colors we had
2815 					s->img_out_n = pal_img_n;
2816 					if (req_comp >= 3) s->img_out_n = req_comp;
2817 					if (!expand_palette(z, palette, pal_len, s->img_out_n))
2818 						return 0;
2819 				}
2820 				free(z->expanded);
2821 				z->expanded = NULL;
2822 				return 1;
2823 			}
2824 
2825 			default:
2826 				// if critical, fail
2827 				if (first) return e("first not IHDR", "Corrupt PNG");
2828 				if ((c.type & (1 << 29)) == 0)
2829 				{
2830 #ifndef STBI_NO_FAILURE_STRINGS
2831 					// not threadsafe
2832 					static char invalid_chunk[] = "XXXX chunk not known";
2833 					invalid_chunk[0] = (uint8)(c.type >> 24);
2834 					invalid_chunk[1] = (uint8)(c.type >> 16);
2835 					invalid_chunk[2] = (uint8)(c.type >> 8);
2836 					invalid_chunk[3] = (uint8)(c.type >> 0);
2837 #endif
2838 					return e(invalid_chunk, "PNG not supported: unknown chunk type");
2839 				}
2840 				skip(s, c.length);
2841 				break;
2842 		}
2843 		// end of chunk, read and skip CRC
2844 		get32(s);
2845 	}
2846 }
2847 
do_png(png * p,int * x,int * y,int * n,int req_comp)2848 static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp)
2849 {
2850 	unsigned char *result = NULL;
2851 	if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error");
2852 	if (parse_png_file(p, SCAN_load, req_comp))
2853 	{
2854 		result = p->out;
2855 		p->out = NULL;
2856 		if (req_comp && req_comp != p->s->img_out_n)
2857 		{
2858 			result = convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
2859 			p->s->img_out_n = req_comp;
2860 			if (result == NULL) return result;
2861 		}
2862 		*x = p->s->img_x;
2863 		*y = p->s->img_y;
2864 		if (n) *n = p->s->img_n;
2865 	}
2866 	free(p->out);
2867 	p->out = NULL;
2868 	free(p->expanded);
2869 	p->expanded = NULL;
2870 	free(p->idata);
2871 	p->idata = NULL;
2872 
2873 	return result;
2874 }
2875 
stbi_png_load(stbi * s,int * x,int * y,int * comp,int req_comp)2876 static unsigned char *stbi_png_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2877 {
2878 	png p;
2879 	p.s = s;
2880 	return do_png(&p, x, y, comp, req_comp);
2881 }
2882 
stbi_png_test(stbi * s)2883 static int stbi_png_test(stbi *s)
2884 {
2885 	int r;
2886 	r = check_png_header(s);
2887 	stbi_rewind(s);
2888 	return r;
2889 }
2890 
stbi_png_info_raw(png * p,int * x,int * y,int * comp)2891 static int stbi_png_info_raw(png *p, int *x, int *y, int *comp)
2892 {
2893 	if (!parse_png_file(p, SCAN_header, 0))
2894 	{
2895 		stbi_rewind(p->s);
2896 		return 0;
2897 	}
2898 	if (x) *x = p->s->img_x;
2899 	if (y) *y = p->s->img_y;
2900 	if (comp) *comp = p->s->img_n;
2901 	return 1;
2902 }
2903 
stbi_png_info(stbi * s,int * x,int * y,int * comp)2904 static int stbi_png_info(stbi *s, int *x, int *y, int *comp)
2905 {
2906 	png p;
2907 	p.s = s;
2908 	return stbi_png_info_raw(&p, x, y, comp);
2909 }
2910 
2911 // Microsoft/Windows BMP image
2912 
bmp_test(stbi * s)2913 static int bmp_test(stbi *s)
2914 {
2915 	int sz;
2916 	if (get8(s) != 'B') return 0;
2917 	if (get8(s) != 'M') return 0;
2918 	get32le(s);  // discard filesize
2919 	get16le(s);  // discard reserved
2920 	get16le(s);  // discard reserved
2921 	get32le(s);  // discard data offset
2922 	sz = get32le(s);
2923 	if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1;
2924 	return 0;
2925 }
2926 
stbi_bmp_test(stbi * s)2927 static int stbi_bmp_test(stbi *s)
2928 {
2929 	int r = bmp_test(s);
2930 	stbi_rewind(s);
2931 	return r;
2932 }
2933 
2934 // returns 0..31 for the highest set bit
high_bit(unsigned int z)2935 static int high_bit(unsigned int z)
2936 {
2937 	int n = 0;
2938 	if (z == 0) return -1;
2939 	if (z >= 0x10000) n += 16, z >>= 16;
2940 	if (z >= 0x00100) n += 8, z >>= 8;
2941 	if (z >= 0x00010) n += 4, z >>= 4;
2942 	if (z >= 0x00004) n += 2, z >>= 2;
2943 	if (z >= 0x00002) n += 1, z >>= 1;
2944 	return n;
2945 }
2946 
bitcount(unsigned int a)2947 static int bitcount(unsigned int a)
2948 {
2949 	a = (a & 0x55555555) + ((a >> 1) & 0x55555555);  // max 2
2950 	a = (a & 0x33333333) + ((a >> 2) & 0x33333333);  // max 4
2951 	a = (a + (a >> 4)) & 0x0f0f0f0f;                 // max 8 per 4, now 8 bits
2952 	a = (a + (a >> 8));                              // max 16 per 8 bits
2953 	a = (a + (a >> 16));                             // max 32 per 8 bits
2954 	return a & 0xff;
2955 }
2956 
shiftsigned(int v,int shift,int bits)2957 static int shiftsigned(int v, int shift, int bits)
2958 {
2959 	int result;
2960 	int z = 0;
2961 
2962 	if (shift < 0)
2963 		v <<= -shift;
2964 	else
2965 		v >>= shift;
2966 	result = v;
2967 
2968 	z = bits;
2969 	while (z < 8)
2970 	{
2971 		result += v >> z;
2972 		z += bits;
2973 	}
2974 	return result;
2975 }
2976 
bmp_load(stbi * s,int * x,int * y,int * comp,int req_comp)2977 static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
2978 {
2979 	uint8 *out;
2980 	unsigned int mr = 0, mg = 0, mb = 0, ma = 0;  //, fake_a=0;
2981 	stbi_uc pal[256][4];
2982 	int psize = 0, i, j, compress = 0, width;
2983 	int bpp, flip_vertically, pad, target, offset, hsz;
2984 	if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP");
2985 	get32le(s);  // discard filesize
2986 	get16le(s);  // discard reserved
2987 	get16le(s);  // discard reserved
2988 	offset = get32le(s);
2989 	hsz = get32le(s);
2990 	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown");
2991 	if (hsz == 12)
2992 	{
2993 		s->img_x = get16le(s);
2994 		s->img_y = get16le(s);
2995 	}
2996 	else
2997 	{
2998 		s->img_x = get32le(s);
2999 		s->img_y = get32le(s);
3000 	}
3001 	if (get16le(s) != 1) return epuc("bad BMP", "bad BMP");
3002 	bpp = get16le(s);
3003 	if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit");
3004 	flip_vertically = ((int)s->img_y) > 0;
3005 	s->img_y = abs((int)s->img_y);
3006 	if (hsz == 12)
3007 	{
3008 		if (bpp < 24)
3009 			psize = (offset - 14 - 24) / 3;
3010 	}
3011 	else
3012 	{
3013 		compress = get32le(s);
3014 		if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE");
3015 		get32le(s);  // discard sizeof
3016 		get32le(s);  // discard hres
3017 		get32le(s);  // discard vres
3018 		get32le(s);  // discard colorsused
3019 		get32le(s);  // discard max important
3020 		if (hsz == 40 || hsz == 56)
3021 		{
3022 			if (hsz == 56)
3023 			{
3024 				get32le(s);
3025 				get32le(s);
3026 				get32le(s);
3027 				get32le(s);
3028 			}
3029 			if (bpp == 16 || bpp == 32)
3030 			{
3031 				mr = mg = mb = 0;
3032 				if (compress == 0)
3033 				{
3034 					if (bpp == 32)
3035 					{
3036 						mr = 0xffu << 16;
3037 						mg = 0xffu << 8;
3038 						mb = 0xffu << 0;
3039 						ma = 0xffu << 24;
3040 						//fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255
3041 					}
3042 					else
3043 					{
3044 						mr = 31u << 10;
3045 						mg = 31u << 5;
3046 						mb = 31u << 0;
3047 					}
3048 				}
3049 				else if (compress == 3)
3050 				{
3051 					mr = get32le(s);
3052 					mg = get32le(s);
3053 					mb = get32le(s);
3054 					// not documented, but generated by photoshop and handled by mspaint
3055 					if (mr == mg && mg == mb)
3056 					{
3057 						// ?!?!?
3058 						return epuc("bad BMP", "bad BMP");
3059 					}
3060 				}
3061 				else
3062 					return epuc("bad BMP", "bad BMP");
3063 			}
3064 		}
3065 		else
3066 		{
3067 			assert(hsz == 108);
3068 			mr = get32le(s);
3069 			mg = get32le(s);
3070 			mb = get32le(s);
3071 			ma = get32le(s);
3072 			get32le(s);  // discard color space
3073 			for (i = 0; i < 12; ++i)
3074 				get32le(s);  // discard color space parameters
3075 		}
3076 		if (bpp < 16)
3077 			psize = (offset - 14 - hsz) >> 2;
3078 	}
3079 	s->img_n = ma ? 4 : 3;
3080 	if (req_comp && req_comp >= 3)  // we can directly decode 3 or 4
3081 		target = req_comp;
3082 	else
3083 		target = s->img_n;  // if they want monochrome, we'll post-convert
3084 	out = (stbi_uc *)malloc(target * s->img_x * s->img_y);
3085 	if (!out) return epuc("outofmem", "Out of memory");
3086 	if (bpp < 16)
3087 	{
3088 		int z = 0;
3089 		if (psize == 0 || psize > 256)
3090 		{
3091 			free(out);
3092 			return epuc("invalid", "Corrupt BMP");
3093 		}
3094 		for (i = 0; i < psize; ++i)
3095 		{
3096 			pal[i][2] = get8u(s);
3097 			pal[i][1] = get8u(s);
3098 			pal[i][0] = get8u(s);
3099 			if (hsz != 12) get8(s);
3100 			pal[i][3] = 255;
3101 		}
3102 		skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4));
3103 		if (bpp == 4)
3104 			width = (s->img_x + 1) >> 1;
3105 		else if (bpp == 8)
3106 			width = s->img_x;
3107 		else
3108 		{
3109 			free(out);
3110 			return epuc("bad bpp", "Corrupt BMP");
3111 		}
3112 		pad = (-width) & 3;
3113 		for (j = 0; j < (int)s->img_y; ++j)
3114 		{
3115 			for (i = 0; i < (int)s->img_x; i += 2)
3116 			{
3117 				int v = get8(s), v2 = 0;
3118 				if (bpp == 4)
3119 				{
3120 					v2 = v & 15;
3121 					v >>= 4;
3122 				}
3123 				out[z++] = pal[v][0];
3124 				out[z++] = pal[v][1];
3125 				out[z++] = pal[v][2];
3126 				if (target == 4) out[z++] = 255;
3127 				if (i + 1 == (int)s->img_x) break;
3128 				v = (bpp == 8) ? get8(s) : v2;
3129 				out[z++] = pal[v][0];
3130 				out[z++] = pal[v][1];
3131 				out[z++] = pal[v][2];
3132 				if (target == 4) out[z++] = 255;
3133 			}
3134 			skip(s, pad);
3135 		}
3136 	}
3137 	else
3138 	{
3139 		int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
3140 		int z = 0;
3141 		int easy = 0;
3142 		skip(s, offset - 14 - hsz);
3143 		if (bpp == 24)
3144 			width = 3 * s->img_x;
3145 		else if (bpp == 16)
3146 			width = 2 * s->img_x;
3147 		else /* bpp = 32 and pad = 0 */
3148 			width = 0;
3149 		pad = (-width) & 3;
3150 		if (bpp == 24)
3151 		{
3152 			easy = 1;
3153 		}
3154 		else if (bpp == 32)
3155 		{
3156 			if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
3157 				easy = 2;
3158 		}
3159 		if (!easy)
3160 		{
3161 			if (!mr || !mg || !mb)
3162 			{
3163 				free(out);
3164 				return epuc("bad masks", "Corrupt BMP");
3165 			}
3166 			// right shift amt to put high bit in position #7
3167 			rshift = high_bit(mr) - 7;
3168 			rcount = bitcount(mr);
3169 			gshift = high_bit(mg) - 7;
3170 			gcount = bitcount(mr);
3171 			bshift = high_bit(mb) - 7;
3172 			bcount = bitcount(mr);
3173 			ashift = high_bit(ma) - 7;
3174 			acount = bitcount(mr);
3175 		}
3176 		for (j = 0; j < (int)s->img_y; ++j)
3177 		{
3178 			if (easy)
3179 			{
3180 				for (i = 0; i < (int)s->img_x; ++i)
3181 				{
3182 					int a;
3183 					out[z + 2] = get8u(s);
3184 					out[z + 1] = get8u(s);
3185 					out[z + 0] = get8u(s);
3186 					z += 3;
3187 					a = (easy == 2 ? get8(s) : 255);
3188 					if (target == 4) out[z++] = (uint8)a;
3189 				}
3190 			}
3191 			else
3192 			{
3193 				for (i = 0; i < (int)s->img_x; ++i)
3194 				{
3195 					uint32 v = (bpp == 16 ? get16le(s) : get32le(s));
3196 					int a;
3197 					out[z++] = (uint8)shiftsigned(v & mr, rshift, rcount);
3198 					out[z++] = (uint8)shiftsigned(v & mg, gshift, gcount);
3199 					out[z++] = (uint8)shiftsigned(v & mb, bshift, bcount);
3200 					a = (ma ? shiftsigned(v & ma, ashift, acount) : 255);
3201 					if (target == 4) out[z++] = (uint8)a;
3202 				}
3203 			}
3204 			skip(s, pad);
3205 		}
3206 	}
3207 	if (flip_vertically)
3208 	{
3209 		stbi_uc t;
3210 		for (j = 0; j<(int)s->img_y>> 1; ++j)
3211 		{
3212 			stbi_uc *p1 = out + j * s->img_x * target;
3213 			stbi_uc *p2 = out + (s->img_y - 1 - j) * s->img_x * target;
3214 			for (i = 0; i < (int)s->img_x * target; ++i)
3215 			{
3216 				t = p1[i], p1[i] = p2[i], p2[i] = t;
3217 			}
3218 		}
3219 	}
3220 
3221 	if (req_comp && req_comp != target)
3222 	{
3223 		out = convert_format(out, target, req_comp, s->img_x, s->img_y);
3224 		if (out == NULL) return out;  // convert_format frees input on failure
3225 	}
3226 
3227 	*x = s->img_x;
3228 	*y = s->img_y;
3229 	if (comp) *comp = s->img_n;
3230 	return out;
3231 }
3232 
stbi_bmp_load(stbi * s,int * x,int * y,int * comp,int req_comp)3233 static stbi_uc *stbi_bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3234 {
3235 	return bmp_load(s, x, y, comp, req_comp);
3236 }
3237 
3238 // Targa Truevision - TGA
3239 // by Jonathan Dummer
3240 
tga_info(stbi * s,int * x,int * y,int * comp)3241 static int tga_info(stbi *s, int *x, int *y, int *comp)
3242 {
3243 	int tga_w, tga_h, tga_comp;
3244 	int sz;
3245 	get8u(s);       // discard Offset
3246 	sz = get8u(s);  // color type
3247 	if (sz > 1)
3248 	{
3249 		stbi_rewind(s);
3250 		return 0;  // only RGB or indexed allowed
3251 	}
3252 	sz = get8u(s);  // image type
3253 	// only RGB or grey allowed, +/- RLE
3254 	if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;
3255 	skip(s, 9);
3256 	tga_w = get16le(s);
3257 	if (tga_w < 1)
3258 	{
3259 		stbi_rewind(s);
3260 		return 0;  // test width
3261 	}
3262 	tga_h = get16le(s);
3263 	if (tga_h < 1)
3264 	{
3265 		stbi_rewind(s);
3266 		return 0;  // test height
3267 	}
3268 	sz = get8(s);  // bits per pixel
3269 	// only RGB or RGBA or grey allowed
3270 	if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32))
3271 	{
3272 		stbi_rewind(s);
3273 		return 0;
3274 	}
3275 	tga_comp = sz;
3276 	if (x) *x = tga_w;
3277 	if (y) *y = tga_h;
3278 	if (comp) *comp = tga_comp / 8;
3279 	return 1;  // seems to have passed everything
3280 }
3281 
stbi_tga_info(stbi * s,int * x,int * y,int * comp)3282 int stbi_tga_info(stbi *s, int *x, int *y, int *comp)
3283 {
3284 	return tga_info(s, x, y, comp);
3285 }
3286 
tga_test(stbi * s)3287 static int tga_test(stbi *s)
3288 {
3289 	int sz;
3290 	get8u(s);                                                                                    //   discard Offset
3291 	sz = get8u(s);                                                                               //   color type
3292 	if (sz > 1) return 0;                                                                        //   only RGB or indexed allowed
3293 	sz = get8u(s);                                                                               //   image type
3294 	if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0;  //   only RGB or grey allowed, +/- RLE
3295 	get16(s);                                                                                    //   discard palette start
3296 	get16(s);                                                                                    //   discard palette length
3297 	get8(s);                                                                                     //   discard bits per palette color entry
3298 	get16(s);                                                                                    //   discard x origin
3299 	get16(s);                                                                                    //   discard y origin
3300 	if (get16(s) < 1) return 0;                                                                  //   test width
3301 	if (get16(s) < 1) return 0;                                                                  //   test height
3302 	sz = get8(s);                                                                                //   bits per pixel
3303 	if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) return 0;                           //   only RGB or RGBA or grey allowed
3304 	return 1;                                                                                    //   seems to have passed everything
3305 }
3306 
stbi_tga_test(stbi * s)3307 static int stbi_tga_test(stbi *s)
3308 {
3309 	int res = tga_test(s);
3310 	stbi_rewind(s);
3311 	return res;
3312 }
3313 
tga_load(stbi * s,int * x,int * y,int * comp,int req_comp)3314 static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3315 {
3316 	//   read in the TGA header stuff
3317 	int tga_offset = get8u(s);
3318 	int tga_indexed = get8u(s);
3319 	int tga_image_type = get8u(s);
3320 	int tga_is_RLE = 0;
3321 	int tga_palette_start = get16le(s);
3322 	int tga_palette_len = get16le(s);
3323 	int tga_palette_bits = get8u(s);
3324 	int tga_x_origin = get16le(s);
3325 	int tga_y_origin = get16le(s);
3326 	int tga_width = get16le(s);
3327 	int tga_height = get16le(s);
3328 	int tga_bits_per_pixel = get8u(s);
3329 	int tga_inverted = get8u(s);
3330 	//   image data
3331 	unsigned char *tga_data;
3332 	unsigned char *tga_palette = NULL;
3333 	int i, j;
3334 	unsigned char raw_data[4];
3335 	unsigned char trans_data[4];
3336 	int RLE_count = 0;
3337 	int RLE_repeating = 0;
3338 	int read_next_pixel = 1;
3339 
3340 	//   do a tiny bit of precessing
3341 	if (tga_image_type >= 8)
3342 	{
3343 		tga_image_type -= 8;
3344 		tga_is_RLE = 1;
3345 	}
3346 	/* int tga_alpha_bits = tga_inverted & 15; */
3347 	tga_inverted = 1 - ((tga_inverted >> 5) & 1);
3348 
3349 	//   error check
3350 	if (  //(tga_indexed) ||
3351 		(tga_width < 1) || (tga_height < 1) ||
3352 		(tga_image_type < 1) || (tga_image_type > 3) ||
3353 		((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) &&
3354 		 (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)))
3355 	{
3356 		return NULL;  // we don't report this as a bad TGA because we don't even know if it's TGA
3357 	}
3358 
3359 	//   If I'm paletted, then I'll use the number of bits from the palette
3360 	if (tga_indexed)
3361 	{
3362 		tga_bits_per_pixel = tga_palette_bits;
3363 	}
3364 
3365 	//   tga info
3366 	*x = tga_width;
3367 	*y = tga_height;
3368 	if ((req_comp < 1) || (req_comp > 4))
3369 	{
3370 		//   just use whatever the file was
3371 		req_comp = tga_bits_per_pixel / 8;
3372 		*comp = req_comp;
3373 	}
3374 	else
3375 	{
3376 		//   force a new number of components
3377 		*comp = tga_bits_per_pixel / 8;
3378 	}
3379 	tga_data = (unsigned char *)malloc(tga_width * tga_height * req_comp);
3380 	if (!tga_data) return epuc("outofmem", "Out of memory");
3381 
3382 	//   skip to the data's starting position (offset usually = 0)
3383 	skip(s, tga_offset);
3384 	//   do I need to load a palette?
3385 	if (tga_indexed)
3386 	{
3387 		//   any data to skip? (offset usually = 0)
3388 		skip(s, tga_palette_start);
3389 		//   load the palette
3390 		tga_palette = (unsigned char *)malloc(tga_palette_len * tga_palette_bits / 8);
3391 		if (!tga_palette) return epuc("outofmem", "Out of memory");
3392 		if (!getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8))
3393 		{
3394 			free(tga_data);
3395 			free(tga_palette);
3396 			return epuc("bad palette", "Corrupt TGA");
3397 		}
3398 	}
3399 	//   load the data
3400 	trans_data[0] = trans_data[1] = trans_data[2] = trans_data[3] = 0;
3401 	for (i = 0; i < tga_width * tga_height; ++i)
3402 	{
3403 		//   if I'm in RLE mode, do I need to get a RLE chunk?
3404 		if (tga_is_RLE)
3405 		{
3406 			if (RLE_count == 0)
3407 			{
3408 				//   yep, get the next byte as a RLE command
3409 				int RLE_cmd = get8u(s);
3410 				RLE_count = 1 + (RLE_cmd & 127);
3411 				RLE_repeating = RLE_cmd >> 7;
3412 				read_next_pixel = 1;
3413 			}
3414 			else if (!RLE_repeating)
3415 			{
3416 				read_next_pixel = 1;
3417 			}
3418 		}
3419 		else
3420 		{
3421 			read_next_pixel = 1;
3422 		}
3423 		//   OK, if I need to read a pixel, do it now
3424 		if (read_next_pixel)
3425 		{
3426 			//   load however much data we did have
3427 			if (tga_indexed)
3428 			{
3429 				//   read in 1 byte, then perform the lookup
3430 				int pal_idx = get8u(s);
3431 				if (pal_idx >= tga_palette_len)
3432 				{
3433 					//   invalid index
3434 					pal_idx = 0;
3435 				}
3436 				pal_idx *= tga_bits_per_pixel / 8;
3437 				for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
3438 				{
3439 					raw_data[j] = tga_palette[pal_idx + j];
3440 				}
3441 			}
3442 			else
3443 			{
3444 				//   read in the data raw
3445 				for (j = 0; j * 8 < tga_bits_per_pixel; ++j)
3446 				{
3447 					raw_data[j] = get8u(s);
3448 				}
3449 			}
3450 			//   convert raw to the intermediate format
3451 			switch (tga_bits_per_pixel)
3452 			{
3453 				case 8:
3454 					//   Luminous => RGBA
3455 					trans_data[0] = raw_data[0];
3456 					trans_data[1] = raw_data[0];
3457 					trans_data[2] = raw_data[0];
3458 					trans_data[3] = 255;
3459 					break;
3460 				case 16:
3461 					//   Luminous,Alpha => RGBA
3462 					trans_data[0] = raw_data[0];
3463 					trans_data[1] = raw_data[0];
3464 					trans_data[2] = raw_data[0];
3465 					trans_data[3] = raw_data[1];
3466 					break;
3467 				case 24:
3468 					//   BGR => RGBA
3469 					trans_data[0] = raw_data[2];
3470 					trans_data[1] = raw_data[1];
3471 					trans_data[2] = raw_data[0];
3472 					trans_data[3] = 255;
3473 					break;
3474 				case 32:
3475 					//   BGRA => RGBA
3476 					trans_data[0] = raw_data[2];
3477 					trans_data[1] = raw_data[1];
3478 					trans_data[2] = raw_data[0];
3479 					trans_data[3] = raw_data[3];
3480 					break;
3481 			}
3482 			//   clear the reading flag for the next pixel
3483 			read_next_pixel = 0;
3484 		}  // end of reading a pixel
3485 		//   convert to final format
3486 		switch (req_comp)
3487 		{
3488 			case 1:
3489 				//   RGBA => Luminance
3490 				tga_data[i * req_comp + 0] = compute_y(trans_data[0], trans_data[1], trans_data[2]);
3491 				break;
3492 			case 2:
3493 				//   RGBA => Luminance,Alpha
3494 				tga_data[i * req_comp + 0] = compute_y(trans_data[0], trans_data[1], trans_data[2]);
3495 				tga_data[i * req_comp + 1] = trans_data[3];
3496 				break;
3497 			case 3:
3498 				//   RGBA => RGB
3499 				tga_data[i * req_comp + 0] = trans_data[0];
3500 				tga_data[i * req_comp + 1] = trans_data[1];
3501 				tga_data[i * req_comp + 2] = trans_data[2];
3502 				break;
3503 			case 4:
3504 				//   RGBA => RGBA
3505 				tga_data[i * req_comp + 0] = trans_data[0];
3506 				tga_data[i * req_comp + 1] = trans_data[1];
3507 				tga_data[i * req_comp + 2] = trans_data[2];
3508 				tga_data[i * req_comp + 3] = trans_data[3];
3509 				break;
3510 		}
3511 		//   in case we're in RLE mode, keep counting down
3512 		--RLE_count;
3513 	}
3514 	//   do I need to invert the image?
3515 	if (tga_inverted)
3516 	{
3517 		for (j = 0; j * 2 < tga_height; ++j)
3518 		{
3519 			int index1 = j * tga_width * req_comp;
3520 			int index2 = (tga_height - 1 - j) * tga_width * req_comp;
3521 			for (i = tga_width * req_comp; i > 0; --i)
3522 			{
3523 				unsigned char temp = tga_data[index1];
3524 				tga_data[index1] = tga_data[index2];
3525 				tga_data[index2] = temp;
3526 				++index1;
3527 				++index2;
3528 			}
3529 		}
3530 	}
3531 	//   clear my palette, if I had one
3532 	if (tga_palette != NULL)
3533 	{
3534 		free(tga_palette);
3535 	}
3536 	//   the things I do to get rid of an error message, and yet keep
3537 	//   Microsoft's C compilers happy... [8^(
3538 	tga_palette_start = tga_palette_len = tga_palette_bits =
3539 		tga_x_origin = tga_y_origin = 0;
3540 	//   OK, done
3541 	return tga_data;
3542 }
3543 
stbi_tga_load(stbi * s,int * x,int * y,int * comp,int req_comp)3544 static stbi_uc *stbi_tga_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3545 {
3546 	return tga_load(s, x, y, comp, req_comp);
3547 }
3548 
3549 // *************************************************************************************************
3550 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
3551 
psd_test(stbi * s)3552 static int psd_test(stbi *s)
3553 {
3554 	if (get32(s) != 0x38425053)
3555 		return 0;  // "8BPS"
3556 	else
3557 		return 1;
3558 }
3559 
stbi_psd_test(stbi * s)3560 static int stbi_psd_test(stbi *s)
3561 {
3562 	int r = psd_test(s);
3563 	stbi_rewind(s);
3564 	return r;
3565 }
3566 
psd_load(stbi * s,int * x,int * y,int * comp,int req_comp)3567 static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3568 {
3569 	int pixelCount;
3570 	int channelCount, compression;
3571 	int channel, i, count, len;
3572 	int w, h;
3573 	uint8 *out;
3574 
3575 	// Check identifier
3576 	if (get32(s) != 0x38425053)  // "8BPS"
3577 		return epuc("not PSD", "Corrupt PSD image");
3578 
3579 	// Check file type version.
3580 	if (get16(s) != 1)
3581 		return epuc("wrong version", "Unsupported version of PSD image");
3582 
3583 	// Skip 6 reserved bytes.
3584 	skip(s, 6);
3585 
3586 	// Read the number of channels (R, G, B, A, etc).
3587 	channelCount = get16(s);
3588 	if (channelCount < 0 || channelCount > 16)
3589 		return epuc("wrong channel count", "Unsupported number of channels in PSD image");
3590 
3591 	// Read the rows and columns of the image.
3592 	h = get32(s);
3593 	w = get32(s);
3594 
3595 	// Make sure the depth is 8 bits.
3596 	if (get16(s) != 8)
3597 		return epuc("unsupported bit depth", "PSD bit depth is not 8 bit");
3598 
3599 	// Make sure the color mode is RGB.
3600 	// Valid options are:
3601 	//   0: Bitmap
3602 	//   1: Grayscale
3603 	//   2: Indexed color
3604 	//   3: RGB color
3605 	//   4: CMYK color
3606 	//   7: Multichannel
3607 	//   8: Duotone
3608 	//   9: Lab color
3609 	if (get16(s) != 3)
3610 		return epuc("wrong color format", "PSD is not in RGB color format");
3611 
3612 	// Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
3613 	skip(s, get32(s));
3614 
3615 	// Skip the image resources.  (resolution, pen tool paths, etc)
3616 	skip(s, get32(s));
3617 
3618 	// Skip the reserved data.
3619 	skip(s, get32(s));
3620 
3621 	// Find out if the data is compressed.
3622 	// Known values:
3623 	//   0: no compression
3624 	//   1: RLE compressed
3625 	compression = get16(s);
3626 	if (compression > 1)
3627 		return epuc("bad compression", "PSD has an unknown compression format");
3628 
3629 	// Create the destination image.
3630 	out = (stbi_uc *)malloc(4 * w * h);
3631 	if (!out) return epuc("outofmem", "Out of memory");
3632 	pixelCount = w * h;
3633 
3634 	// Initialize the data to zero.
3635 	//memset( out, 0, pixelCount * 4 );
3636 
3637 	// Finally, the image data.
3638 	if (compression)
3639 	{
3640 		// RLE as used by .PSD and .TIFF
3641 		// Loop until you get the number of unpacked bytes you are expecting:
3642 		//     Read the next source byte into n.
3643 		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
3644 		//     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
3645 		//     Else if n is 128, noop.
3646 		// Endloop
3647 
3648 		// The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
3649 		// which we're going to just skip.
3650 		skip(s, h * channelCount * 2);
3651 
3652 		// Read the RLE data by channel.
3653 		for (channel = 0; channel < 4; channel++)
3654 		{
3655 			uint8 *p;
3656 
3657 			p = out + channel;
3658 			if (channel >= channelCount)
3659 			{
3660 				// Fill this channel with default data.
3661 				for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4;
3662 			}
3663 			else
3664 			{
3665 				// Read the RLE data.
3666 				count = 0;
3667 				while (count < pixelCount)
3668 				{
3669 					len = get8(s);
3670 					if (len == 128)
3671 					{
3672 						// No-op.
3673 					}
3674 					else if (len < 128)
3675 					{
3676 						// Copy next len+1 bytes literally.
3677 						len++;
3678 						count += len;
3679 						while (len)
3680 						{
3681 							*p = get8u(s);
3682 							p += 4;
3683 							len--;
3684 						}
3685 					}
3686 					else if (len > 128)
3687 					{
3688 						uint8 val;
3689 						// Next -len+1 bytes in the dest are replicated from next source byte.
3690 						// (Interpret len as a negative 8-bit int.)
3691 						len ^= 0x0FF;
3692 						len += 2;
3693 						val = get8u(s);
3694 						count += len;
3695 						while (len)
3696 						{
3697 							*p = val;
3698 							p += 4;
3699 							len--;
3700 						}
3701 					}
3702 				}
3703 			}
3704 		}
3705 	}
3706 	else
3707 	{
3708 		// We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
3709 		// where each channel consists of an 8-bit value for each pixel in the image.
3710 
3711 		// Read the data by channel.
3712 		for (channel = 0; channel < 4; channel++)
3713 		{
3714 			uint8 *p;
3715 
3716 			p = out + channel;
3717 			if (channel > channelCount)
3718 			{
3719 				// Fill this channel with default data.
3720 				for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4;
3721 			}
3722 			else
3723 			{
3724 				// Read the data.
3725 				for (i = 0; i < pixelCount; i++)
3726 					*p = get8u(s), p += 4;
3727 			}
3728 		}
3729 	}
3730 
3731 	if (req_comp && req_comp != 4)
3732 	{
3733 		out = convert_format(out, 4, req_comp, w, h);
3734 		if (out == NULL) return out;  // convert_format frees input on failure
3735 	}
3736 
3737 	if (comp) *comp = channelCount;
3738 	*y = h;
3739 	*x = w;
3740 
3741 	return out;
3742 }
3743 
stbi_psd_load(stbi * s,int * x,int * y,int * comp,int req_comp)3744 static stbi_uc *stbi_psd_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3745 {
3746 	return psd_load(s, x, y, comp, req_comp);
3747 }
3748 
3749 // *************************************************************************************************
3750 // Softimage PIC loader
3751 // by Tom Seddon
3752 //
3753 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
3754 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
3755 
pic_is4(stbi * s,const char * str)3756 static int pic_is4(stbi *s, const char *str)
3757 {
3758 	int i;
3759 	for (i = 0; i < 4; ++i)
3760 		if (get8(s) != (stbi_uc)str[i])
3761 			return 0;
3762 
3763 	return 1;
3764 }
3765 
pic_test(stbi * s)3766 static int pic_test(stbi *s)
3767 {
3768 	int i;
3769 
3770 	if (!pic_is4(s, "\x53\x80\xF6\x34"))
3771 		return 0;
3772 
3773 	for (i = 0; i < 84; ++i)
3774 		get8(s);
3775 
3776 	if (!pic_is4(s, "PICT"))
3777 		return 0;
3778 
3779 	return 1;
3780 }
3781 
3782 typedef struct
3783 {
3784 	stbi_uc size, type, channel;
3785 } pic_packet_t;
3786 
pic_readval(stbi * s,int channel,stbi_uc * dest)3787 static stbi_uc *pic_readval(stbi *s, int channel, stbi_uc *dest)
3788 {
3789 	int mask = 0x80, i;
3790 
3791 	for (i = 0; i < 4; ++i, mask >>= 1)
3792 	{
3793 		if (channel & mask)
3794 		{
3795 			if (at_eof(s)) return epuc("bad file", "PIC file too short");
3796 			dest[i] = get8u(s);
3797 		}
3798 	}
3799 
3800 	return dest;
3801 }
3802 
pic_copyval(int channel,stbi_uc * dest,const stbi_uc * src)3803 static void pic_copyval(int channel, stbi_uc *dest, const stbi_uc *src)
3804 {
3805 	int mask = 0x80, i;
3806 
3807 	for (i = 0; i < 4; ++i, mask >>= 1)
3808 		if (channel & mask)
3809 			dest[i] = src[i];
3810 }
3811 
pic_load2(stbi * s,int width,int height,int * comp,stbi_uc * result)3812 static stbi_uc *pic_load2(stbi *s, int width, int height, int *comp, stbi_uc *result)
3813 {
3814 	int act_comp = 0, num_packets = 0, y, chained;
3815 	pic_packet_t packets[10];
3816 
3817 	// this will (should...) cater for even some bizarre stuff like having data
3818 	// for the same channel in multiple packets.
3819 	do
3820 	{
3821 		pic_packet_t *packet;
3822 
3823 		if (num_packets == sizeof(packets) / sizeof(packets[0]))
3824 			return epuc("bad format", "too many packets");
3825 
3826 		packet = &packets[num_packets++];
3827 
3828 		chained = get8(s);
3829 		packet->size = get8u(s);
3830 		packet->type = get8u(s);
3831 		packet->channel = get8u(s);
3832 
3833 		act_comp |= packet->channel;
3834 
3835 		if (at_eof(s)) return epuc("bad file", "file too short (reading packets)");
3836 		if (packet->size != 8) return epuc("bad format", "packet isn't 8bpp");
3837 	} while (chained);
3838 
3839 	*comp = (act_comp & 0x10 ? 4 : 3);  // has alpha channel?
3840 
3841 	for (y = 0; y < height; ++y)
3842 	{
3843 		int packet_idx;
3844 
3845 		for (packet_idx = 0; packet_idx < num_packets; ++packet_idx)
3846 		{
3847 			pic_packet_t *packet = &packets[packet_idx];
3848 			stbi_uc *dest = result + y * width * 4;
3849 
3850 			switch (packet->type)
3851 			{
3852 				default:
3853 					return epuc("bad format", "packet has bad compression type");
3854 
3855 				case 0:
3856 				{  //uncompressed
3857 					int x;
3858 
3859 					for (x = 0; x < width; ++x, dest += 4)
3860 						if (!pic_readval(s, packet->channel, dest))
3861 							return 0;
3862 					break;
3863 				}
3864 
3865 				case 1:  //Pure RLE
3866 				{
3867 					int left = width, i;
3868 
3869 					while (left > 0)
3870 					{
3871 						stbi_uc count, value[4];
3872 
3873 						count = get8u(s);
3874 						if (at_eof(s)) return epuc("bad file", "file too short (pure read count)");
3875 
3876 						if (count > left)
3877 							count = (uint8)left;
3878 
3879 						if (!pic_readval(s, packet->channel, value)) return 0;
3880 
3881 						for (i = 0; i < count; ++i, dest += 4)
3882 							pic_copyval(packet->channel, dest, value);
3883 						left -= count;
3884 					}
3885 				}
3886 				break;
3887 
3888 				case 2:
3889 				{  //Mixed RLE
3890 					int left = width;
3891 					while (left > 0)
3892 					{
3893 						int count = get8(s), i;
3894 						if (at_eof(s)) return epuc("bad file", "file too short (mixed read count)");
3895 
3896 						if (count >= 128)
3897 						{  // Repeated
3898 							stbi_uc value[4];
3899 							int i;
3900 
3901 							if (count == 128)
3902 								count = get16(s);
3903 							else
3904 								count -= 127;
3905 							if (count > left)
3906 								return epuc("bad file", "scanline overrun");
3907 
3908 							if (!pic_readval(s, packet->channel, value))
3909 								return 0;
3910 
3911 							for (i = 0; i < count; ++i, dest += 4)
3912 								pic_copyval(packet->channel, dest, value);
3913 						}
3914 						else
3915 						{  // Raw
3916 							++count;
3917 							if (count > left) return epuc("bad file", "scanline overrun");
3918 
3919 							for (i = 0; i < count; ++i, dest += 4)
3920 								if (!pic_readval(s, packet->channel, dest))
3921 									return 0;
3922 						}
3923 						left -= count;
3924 					}
3925 					break;
3926 				}
3927 			}
3928 		}
3929 	}
3930 
3931 	return result;
3932 }
3933 
pic_load(stbi * s,int * px,int * py,int * comp,int req_comp)3934 static stbi_uc *pic_load(stbi *s, int *px, int *py, int *comp, int req_comp)
3935 {
3936 	stbi_uc *result;
3937 	int i, x, y;
3938 
3939 	for (i = 0; i < 92; ++i)
3940 		get8(s);
3941 
3942 	x = get16(s);
3943 	y = get16(s);
3944 	if (at_eof(s)) return epuc("bad file", "file too short (pic header)");
3945 	if ((1 << 28) / x < y) return epuc("too large", "Image too large to decode");
3946 
3947 	get32(s);  //skip `ratio'
3948 	get16(s);  //skip `fields'
3949 	get16(s);  //skip `pad'
3950 
3951 	// intermediate buffer is RGBA
3952 	result = (stbi_uc *)malloc(x * y * 4);
3953 	memset(result, 0xff, x * y * 4);
3954 
3955 	if (!pic_load2(s, x, y, comp, result))
3956 	{
3957 		free(result);
3958 		result = 0;
3959 	}
3960 	*px = x;
3961 	*py = y;
3962 	if (req_comp == 0) req_comp = *comp;
3963 	result = convert_format(result, 4, req_comp, x, y);
3964 
3965 	return result;
3966 }
3967 
stbi_pic_test(stbi * s)3968 static int stbi_pic_test(stbi *s)
3969 {
3970 	int r = pic_test(s);
3971 	stbi_rewind(s);
3972 	return r;
3973 }
3974 
stbi_pic_load(stbi * s,int * x,int * y,int * comp,int req_comp)3975 static stbi_uc *stbi_pic_load(stbi *s, int *x, int *y, int *comp, int req_comp)
3976 {
3977 	return pic_load(s, x, y, comp, req_comp);
3978 }
3979 
3980 // *************************************************************************************************
3981 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
3982 typedef struct stbi_gif_lzw_struct
3983 {
3984 	int16 prefix;
3985 	uint8 first;
3986 	uint8 suffix;
3987 } stbi_gif_lzw;
3988 
3989 typedef struct stbi_gif_struct
3990 {
3991 	int w, h;
3992 	stbi_uc *out;  // output buffer (always 4 components)
3993 	int flags, bgindex, ratio, transparent, eflags;
3994 	uint8 pal[256][4];
3995 	uint8 lpal[256][4];
3996 	stbi_gif_lzw codes[4096];
3997 	uint8 *color_table;
3998 	int parse, step;
3999 	int lflags;
4000 	int start_x, start_y;
4001 	int max_x, max_y;
4002 	int cur_x, cur_y;
4003 	int line_size;
4004 } stbi_gif;
4005 
gif_test(stbi * s)4006 static int gif_test(stbi *s)
4007 {
4008 	int sz;
4009 	if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') return 0;
4010 	sz = get8(s);
4011 	if (sz != '9' && sz != '7') return 0;
4012 	if (get8(s) != 'a') return 0;
4013 	return 1;
4014 }
4015 
stbi_gif_test(stbi * s)4016 static int stbi_gif_test(stbi *s)
4017 {
4018 	int r = gif_test(s);
4019 	stbi_rewind(s);
4020 	return r;
4021 }
4022 
stbi_gif_parse_colortable(stbi * s,uint8 pal[256][4],int num_entries,int transp)4023 static void stbi_gif_parse_colortable(stbi *s, uint8 pal[256][4], int num_entries, int transp)
4024 {
4025 	int i;
4026 	for (i = 0; i < num_entries; ++i)
4027 	{
4028 		pal[i][2] = get8u(s);
4029 		pal[i][1] = get8u(s);
4030 		pal[i][0] = get8u(s);
4031 		pal[i][3] = transp ? 0 : 255;
4032 	}
4033 }
4034 
stbi_gif_header(stbi * s,stbi_gif * g,int * comp,int is_info)4035 static int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info)
4036 {
4037 	uint8 version;
4038 	if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8')
4039 		return e("not GIF", "Corrupt GIF");
4040 
4041 	version = get8u(s);
4042 	if (version != '7' && version != '9') return e("not GIF", "Corrupt GIF");
4043 	if (get8(s) != 'a') return e("not GIF", "Corrupt GIF");
4044 
4045 	failure_reason = "";
4046 	g->w = get16le(s);
4047 	g->h = get16le(s);
4048 	g->flags = get8(s);
4049 	g->bgindex = get8(s);
4050 	g->ratio = get8(s);
4051 	g->transparent = -1;
4052 
4053 	if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
4054 
4055 	if (is_info) return 1;
4056 
4057 	if (g->flags & 0x80)
4058 		stbi_gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
4059 
4060 	return 1;
4061 }
4062 
stbi_gif_info_raw(stbi * s,int * x,int * y,int * comp)4063 static int stbi_gif_info_raw(stbi *s, int *x, int *y, int *comp)
4064 {
4065 	stbi_gif g;
4066 	if (!stbi_gif_header(s, &g, comp, 1))
4067 	{
4068 		stbi_rewind(s);
4069 		return 0;
4070 	}
4071 	if (x) *x = g.w;
4072 	if (y) *y = g.h;
4073 	return 1;
4074 }
4075 
stbi_out_gif_code(stbi_gif * g,uint16 code)4076 static void stbi_out_gif_code(stbi_gif *g, uint16 code)
4077 {
4078 	uint8 *p, *c;
4079 
4080 	// recurse to decode the prefixes, since the linked-list is backwards,
4081 	// and working backwards through an interleaved image would be nasty
4082 	if (g->codes[code].prefix >= 0)
4083 		stbi_out_gif_code(g, g->codes[code].prefix);
4084 
4085 	if (g->cur_y >= g->max_y) return;
4086 
4087 	p = &g->out[g->cur_x + g->cur_y];
4088 	c = &g->color_table[g->codes[code].suffix * 4];
4089 
4090 	if (c[3] >= 128)
4091 	{
4092 		p[0] = c[2];
4093 		p[1] = c[1];
4094 		p[2] = c[0];
4095 		p[3] = c[3];
4096 	}
4097 	g->cur_x += 4;
4098 
4099 	if (g->cur_x >= g->max_x)
4100 	{
4101 		g->cur_x = g->start_x;
4102 		g->cur_y += g->step;
4103 
4104 		while (g->cur_y >= g->max_y && g->parse > 0)
4105 		{
4106 			g->step = (1 << g->parse) * g->line_size;
4107 			g->cur_y = g->start_y + (g->step >> 1);
4108 			--g->parse;
4109 		}
4110 	}
4111 }
4112 
stbi_process_gif_raster(stbi * s,stbi_gif * g)4113 static uint8 *stbi_process_gif_raster(stbi *s, stbi_gif *g)
4114 {
4115 	uint8 lzw_cs;
4116 	int32 len, code;
4117 	uint32 first;
4118 	int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
4119 	stbi_gif_lzw *p;
4120 
4121 	lzw_cs = get8u(s);
4122 	clear = 1 << lzw_cs;
4123 	first = 1;
4124 	codesize = lzw_cs + 1;
4125 	codemask = (1 << codesize) - 1;
4126 	bits = 0;
4127 	valid_bits = 0;
4128 	for (code = 0; code < clear; code++)
4129 	{
4130 		g->codes[code].prefix = -1;
4131 		g->codes[code].first = (uint8)code;
4132 		g->codes[code].suffix = (uint8)code;
4133 	}
4134 
4135 	// support no starting clear code
4136 	avail = clear + 2;
4137 	oldcode = -1;
4138 
4139 	len = 0;
4140 	for (;;)
4141 	{
4142 		if (valid_bits < codesize)
4143 		{
4144 			if (len == 0)
4145 			{
4146 				len = get8(s);  // start new block
4147 				if (len == 0)
4148 					return g->out;
4149 			}
4150 			--len;
4151 			bits |= (int32)get8(s) << valid_bits;
4152 			valid_bits += 8;
4153 		}
4154 		else
4155 		{
4156 			int32 code = bits & codemask;
4157 			bits >>= codesize;
4158 			valid_bits -= codesize;
4159 			// @OPTIMIZE: is there some way we can accelerate the non-clear path?
4160 			if (code == clear)
4161 			{  // clear code
4162 				codesize = lzw_cs + 1;
4163 				codemask = (1 << codesize) - 1;
4164 				avail = clear + 2;
4165 				oldcode = -1;
4166 				first = 0;
4167 			}
4168 			else if (code == clear + 1)
4169 			{  // end of stream code
4170 				skip(s, len);
4171 				while ((len = get8(s)) > 0)
4172 					skip(s, len);
4173 				return g->out;
4174 			}
4175 			else if (code <= avail)
4176 			{
4177 				if (first) return epuc("no clear code", "Corrupt GIF");
4178 
4179 				if (oldcode >= 0)
4180 				{
4181 					p = &g->codes[avail++];
4182 					if (avail > 4096) return epuc("too many codes", "Corrupt GIF");
4183 					p->prefix = (int16)oldcode;
4184 					p->first = g->codes[oldcode].first;
4185 					p->suffix = (code == avail) ? p->first : g->codes[code].first;
4186 				}
4187 				else if (code == avail)
4188 					return epuc("illegal code in raster", "Corrupt GIF");
4189 
4190 				stbi_out_gif_code(g, (uint16)code);
4191 
4192 				if ((avail & codemask) == 0 && avail <= 0x0FFF)
4193 				{
4194 					codesize++;
4195 					codemask = (1 << codesize) - 1;
4196 				}
4197 
4198 				oldcode = code;
4199 			}
4200 			else
4201 			{
4202 				return epuc("illegal code in raster", "Corrupt GIF");
4203 			}
4204 		}
4205 	}
4206 }
4207 
stbi_fill_gif_background(stbi_gif * g)4208 static void stbi_fill_gif_background(stbi_gif *g)
4209 {
4210 	int i;
4211 	uint8 *c = g->pal[g->bgindex];
4212 	// @OPTIMIZE: write a dword at a time
4213 	for (i = 0; i < g->w * g->h * 4; i += 4)
4214 	{
4215 		uint8 *p = &g->out[i];
4216 		p[0] = c[2];
4217 		p[1] = c[1];
4218 		p[2] = c[0];
4219 		p[3] = c[3];
4220 	}
4221 }
4222 
4223 // this function is designed to support animated gifs, although stb_image doesn't support it
stbi_gif_load_next(stbi * s,stbi_gif * g,int * comp,int req_comp)4224 static uint8 *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp)
4225 {
4226 	int i;
4227 	uint8 *old_out = 0;
4228 
4229 	if (g->out == 0)
4230 	{
4231 		if (!stbi_gif_header(s, g, comp, 0)) return 0;  // failure_reason set by stbi_gif_header
4232 		g->out = (uint8 *)malloc(4 * g->w * g->h);
4233 		if (g->out == 0) return epuc("outofmem", "Out of memory");
4234 		stbi_fill_gif_background(g);
4235 	}
4236 	else
4237 	{
4238 		// animated-gif-only path
4239 		if (((g->eflags & 0x1C) >> 2) == 3)
4240 		{
4241 			old_out = g->out;
4242 			g->out = (uint8 *)malloc(4 * g->w * g->h);
4243 			if (g->out == 0) return epuc("outofmem", "Out of memory");
4244 			memcpy(g->out, old_out, g->w * g->h * 4);
4245 		}
4246 	}
4247 
4248 	for (;;)
4249 	{
4250 		switch (get8(s))
4251 		{
4252 			case 0x2C: /* Image Descriptor */
4253 			{
4254 				int32 x, y, w, h;
4255 				uint8 *o;
4256 
4257 				x = get16le(s);
4258 				y = get16le(s);
4259 				w = get16le(s);
4260 				h = get16le(s);
4261 				if (((x + w) > (g->w)) || ((y + h) > (g->h)))
4262 					return epuc("bad Image Descriptor", "Corrupt GIF");
4263 
4264 				g->line_size = g->w * 4;
4265 				g->start_x = x * 4;
4266 				g->start_y = y * g->line_size;
4267 				g->max_x = g->start_x + w * 4;
4268 				g->max_y = g->start_y + h * g->line_size;
4269 				g->cur_x = g->start_x;
4270 				g->cur_y = g->start_y;
4271 
4272 				g->lflags = get8(s);
4273 
4274 				if (g->lflags & 0x40)
4275 				{
4276 					g->step = 8 * g->line_size;  // first interlaced spacing
4277 					g->parse = 3;
4278 				}
4279 				else
4280 				{
4281 					g->step = g->line_size;
4282 					g->parse = 0;
4283 				}
4284 
4285 				if (g->lflags & 0x80)
4286 				{
4287 					stbi_gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
4288 					g->color_table = (uint8 *)g->lpal;
4289 				}
4290 				else if (g->flags & 0x80)
4291 				{
4292 					for (i = 0; i < 256; ++i)  // @OPTIMIZE: reset only the previous transparent
4293 						g->pal[i][3] = 255;
4294 					if (g->transparent >= 0 && (g->eflags & 0x01))
4295 						g->pal[g->transparent][3] = 0;
4296 					g->color_table = (uint8 *)g->pal;
4297 				}
4298 				else
4299 					return epuc("missing color table", "Corrupt GIF");
4300 
4301 				o = stbi_process_gif_raster(s, g);
4302 				if (o == NULL) return NULL;
4303 
4304 				if (req_comp && req_comp != 4)
4305 					o = convert_format(o, 4, req_comp, g->w, g->h);
4306 				return o;
4307 			}
4308 
4309 			case 0x21:  // Comment Extension.
4310 			{
4311 				int len;
4312 				if (get8(s) == 0xF9)
4313 				{  // Graphic Control Extension.
4314 					len = get8(s);
4315 					if (len == 4)
4316 					{
4317 						g->eflags = get8(s);
4318 						get16le(s);  // delay
4319 						g->transparent = get8(s);
4320 					}
4321 					else
4322 					{
4323 						skip(s, len);
4324 						break;
4325 					}
4326 				}
4327 				while ((len = get8(s)) != 0)
4328 					skip(s, len);
4329 				break;
4330 			}
4331 
4332 			case 0x3B:  // gif stream termination code
4333 				return (uint8 *)1;
4334 
4335 			default:
4336 				return epuc("unknown code", "Corrupt GIF");
4337 		}
4338 	}
4339 }
4340 
stbi_gif_load(stbi * s,int * x,int * y,int * comp,int req_comp)4341 static stbi_uc *stbi_gif_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4342 {
4343 	uint8 *u = 0;
4344 	stbi_gif g = {0};
4345 
4346 	u = stbi_gif_load_next(s, &g, comp, req_comp);
4347 	if (u == (void *)1) u = 0;  // end of animated gif marker
4348 	if (u)
4349 	{
4350 		*x = g.w;
4351 		*y = g.h;
4352 	}
4353 
4354 	return u;
4355 }
4356 
stbi_gif_info(stbi * s,int * x,int * y,int * comp)4357 static int stbi_gif_info(stbi *s, int *x, int *y, int *comp)
4358 {
4359 	return stbi_gif_info_raw(s, x, y, comp);
4360 }
4361 
4362 // *************************************************************************************************
4363 // Radiance RGBE HDR loader
4364 // originally by Nicolas Schulz
4365 #ifndef STBI_NO_HDR
hdr_test(stbi * s)4366 static int hdr_test(stbi *s)
4367 {
4368 	const char *signature = "#?RADIANCE\n";
4369 	int i;
4370 	for (i = 0; signature[i]; ++i)
4371 		if (get8(s) != signature[i])
4372 			return 0;
4373 	return 1;
4374 }
4375 
stbi_hdr_test(stbi * s)4376 static int stbi_hdr_test(stbi *s)
4377 {
4378 	int r = hdr_test(s);
4379 	stbi_rewind(s);
4380 	return r;
4381 }
4382 
4383 #define HDR_BUFLEN 1024
hdr_gettoken(stbi * z,char * buffer)4384 static char *hdr_gettoken(stbi *z, char *buffer)
4385 {
4386 	int len = 0;
4387 	char c = '\0';
4388 
4389 	c = (char)get8(z);
4390 
4391 	while (!at_eof(z) && c != '\n')
4392 	{
4393 		buffer[len++] = c;
4394 		if (len == HDR_BUFLEN - 1)
4395 		{
4396 			// flush to end of line
4397 			while (!at_eof(z) && get8(z) != '\n')
4398 				;
4399 			break;
4400 		}
4401 		c = (char)get8(z);
4402 	}
4403 
4404 	buffer[len] = 0;
4405 	return buffer;
4406 }
4407 
hdr_convert(float * output,stbi_uc * input,int req_comp)4408 static void hdr_convert(float *output, stbi_uc *input, int req_comp)
4409 {
4410 	if (input[3] != 0)
4411 	{
4412 		float f1;
4413 		// Exponent
4414 		f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
4415 		if (req_comp <= 2)
4416 			output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
4417 		else
4418 		{
4419 			output[0] = input[0] * f1;
4420 			output[1] = input[1] * f1;
4421 			output[2] = input[2] * f1;
4422 		}
4423 		if (req_comp == 2) output[1] = 1;
4424 		if (req_comp == 4) output[3] = 1;
4425 	}
4426 	else
4427 	{
4428 		switch (req_comp)
4429 		{
4430 			case 4:
4431 				output[3] = 1; /* fallthrough */
4432 			case 3:
4433 				output[0] = output[1] = output[2] = 0;
4434 				break;
4435 			case 2:
4436 				output[1] = 1; /* fallthrough */
4437 			case 1:
4438 				output[0] = 0;
4439 				break;
4440 		}
4441 	}
4442 }
4443 
hdr_load(stbi * s,int * x,int * y,int * comp,int req_comp)4444 static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4445 {
4446 	char buffer[HDR_BUFLEN];
4447 	char *token;
4448 	int valid = 0;
4449 	int width, height;
4450 	stbi_uc *scanline;
4451 	float *hdr_data;
4452 	int len;
4453 	unsigned char count, value;
4454 	int i, j, k, c1, c2, z;
4455 
4456 	// Check identifier
4457 	if (strcmp(hdr_gettoken(s, buffer), "#?RADIANCE") != 0)
4458 		return epf("not HDR", "Corrupt HDR image");
4459 
4460 	// Parse header
4461 	for (;;)
4462 	{
4463 		token = hdr_gettoken(s, buffer);
4464 		if (token[0] == 0) break;
4465 		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
4466 	}
4467 
4468 	if (!valid) return epf("unsupported format", "Unsupported HDR format");
4469 
4470 	// Parse width and height
4471 	// can't use sscanf() if we're not using stdio!
4472 	token = hdr_gettoken(s, buffer);
4473 	if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format");
4474 	token += 3;
4475 	height = strtol(token, &token, 10);
4476 	while (*token == ' ') ++token;
4477 	if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format");
4478 	token += 3;
4479 	width = strtol(token, NULL, 10);
4480 
4481 	*x = width;
4482 	*y = height;
4483 
4484 	*comp = 3;
4485 	if (req_comp == 0) req_comp = 3;
4486 
4487 	// Read data
4488 	hdr_data = (float *)malloc(height * width * req_comp * sizeof(float));
4489 
4490 	// Load image data
4491 	// image data is stored as some number of sca
4492 	if (width < 8 || width >= 32768)
4493 	{
4494 		// Read flat data
4495 		for (j = 0; j < height; ++j)
4496 		{
4497 			for (i = 0; i < width; ++i)
4498 			{
4499 				stbi_uc rgbe[4];
4500 			main_decode_loop:
4501 				getn(s, rgbe, 4);
4502 				hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
4503 			}
4504 		}
4505 	}
4506 	else
4507 	{
4508 		// Read RLE-encoded data
4509 		scanline = NULL;
4510 
4511 		for (j = 0; j < height; ++j)
4512 		{
4513 			c1 = get8(s);
4514 			c2 = get8(s);
4515 			len = get8(s);
4516 			if (c1 != 2 || c2 != 2 || (len & 0x80))
4517 			{
4518 				// not run-length encoded, so we have to actually use THIS data as a decoded
4519 				// pixel (note this can't be a valid pixel--one of RGB must be >= 128)
4520 				uint8 rgbe[4];
4521 				rgbe[0] = (uint8)c1;
4522 				rgbe[1] = (uint8)c2;
4523 				rgbe[2] = (uint8)len;
4524 				rgbe[3] = (uint8)get8u(s);
4525 				hdr_convert(hdr_data, rgbe, req_comp);
4526 				i = 1;
4527 				j = 0;
4528 				free(scanline);
4529 				goto main_decode_loop;  // yes, this makes no sense
4530 			}
4531 			len <<= 8;
4532 			len |= get8(s);
4533 			if (len != width)
4534 			{
4535 				free(hdr_data);
4536 				free(scanline);
4537 				return epf("invalid decoded scanline length", "corrupt HDR");
4538 			}
4539 			if (scanline == NULL) scanline = (stbi_uc *)malloc(width * 4);
4540 
4541 			for (k = 0; k < 4; ++k)
4542 			{
4543 				i = 0;
4544 				while (i < width)
4545 				{
4546 					count = get8u(s);
4547 					if (count > 128)
4548 					{
4549 						// Run
4550 						value = get8u(s);
4551 						count -= 128;
4552 						for (z = 0; z < count; ++z)
4553 							scanline[i++ * 4 + k] = value;
4554 					}
4555 					else
4556 					{
4557 						// Dump
4558 						for (z = 0; z < count; ++z)
4559 							scanline[i++ * 4 + k] = get8u(s);
4560 					}
4561 				}
4562 			}
4563 			for (i = 0; i < width; ++i)
4564 				hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);
4565 		}
4566 		free(scanline);
4567 	}
4568 
4569 	return hdr_data;
4570 }
4571 
stbi_hdr_load(stbi * s,int * x,int * y,int * comp,int req_comp)4572 static float *stbi_hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp)
4573 {
4574 	return hdr_load(s, x, y, comp, req_comp);
4575 }
4576 
stbi_hdr_info(stbi * s,int * x,int * y,int * comp)4577 static int stbi_hdr_info(stbi *s, int *x, int *y, int *comp)
4578 {
4579 	char buffer[HDR_BUFLEN];
4580 	char *token;
4581 	int valid = 0;
4582 
4583 	if (strcmp(hdr_gettoken(s, buffer), "#?RADIANCE") != 0)
4584 	{
4585 		stbi_rewind(s);
4586 		return 0;
4587 	}
4588 
4589 	for (;;)
4590 	{
4591 		token = hdr_gettoken(s, buffer);
4592 		if (token[0] == 0) break;
4593 		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
4594 	}
4595 
4596 	if (!valid)
4597 	{
4598 		stbi_rewind(s);
4599 		return 0;
4600 	}
4601 	token = hdr_gettoken(s, buffer);
4602 	if (strncmp(token, "-Y ", 3))
4603 	{
4604 		stbi_rewind(s);
4605 		return 0;
4606 	}
4607 	token += 3;
4608 	*y = strtol(token, &token, 10);
4609 	while (*token == ' ') ++token;
4610 	if (strncmp(token, "+X ", 3))
4611 	{
4612 		stbi_rewind(s);
4613 		return 0;
4614 	}
4615 	token += 3;
4616 	*x = strtol(token, NULL, 10);
4617 	*comp = 3;
4618 	return 1;
4619 }
4620 #endif  // STBI_NO_HDR
4621 
stbi_bmp_info(stbi * s,int * x,int * y,int * comp)4622 static int stbi_bmp_info(stbi *s, int *x, int *y, int *comp)
4623 {
4624 	int hsz;
4625 	if (get8(s) != 'B' || get8(s) != 'M')
4626 	{
4627 		stbi_rewind(s);
4628 		return 0;
4629 	}
4630 	skip(s, 12);
4631 	hsz = get32le(s);
4632 	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108)
4633 	{
4634 		stbi_rewind(s);
4635 		return 0;
4636 	}
4637 	if (hsz == 12)
4638 	{
4639 		*x = get16le(s);
4640 		*y = get16le(s);
4641 	}
4642 	else
4643 	{
4644 		*x = get32le(s);
4645 		*y = get32le(s);
4646 	}
4647 	if (get16le(s) != 1)
4648 	{
4649 		stbi_rewind(s);
4650 		return 0;
4651 	}
4652 	*comp = get16le(s) / 8;
4653 	return 1;
4654 }
4655 
stbi_psd_info(stbi * s,int * x,int * y,int * comp)4656 static int stbi_psd_info(stbi *s, int *x, int *y, int *comp)
4657 {
4658 	int channelCount;
4659 	if (get32(s) != 0x38425053)
4660 	{
4661 		stbi_rewind(s);
4662 		return 0;
4663 	}
4664 	if (get16(s) != 1)
4665 	{
4666 		stbi_rewind(s);
4667 		return 0;
4668 	}
4669 	skip(s, 6);
4670 	channelCount = get16(s);
4671 	if (channelCount < 0 || channelCount > 16)
4672 	{
4673 		stbi_rewind(s);
4674 		return 0;
4675 	}
4676 	*y = get32(s);
4677 	*x = get32(s);
4678 	if (get16(s) != 8)
4679 	{
4680 		stbi_rewind(s);
4681 		return 0;
4682 	}
4683 	if (get16(s) != 3)
4684 	{
4685 		stbi_rewind(s);
4686 		return 0;
4687 	}
4688 	*comp = 4;
4689 	return 1;
4690 }
4691 
stbi_pic_info(stbi * s,int * x,int * y,int * comp)4692 static int stbi_pic_info(stbi *s, int *x, int *y, int *comp)
4693 {
4694 	int act_comp = 0, num_packets = 0, chained;
4695 	pic_packet_t packets[10];
4696 
4697 	skip(s, 92);
4698 
4699 	*x = get16(s);
4700 	*y = get16(s);
4701 	if (at_eof(s)) return 0;
4702 	if ((*x) != 0 && (1 << 28) / (*x) < (*y))
4703 	{
4704 		stbi_rewind(s);
4705 		return 0;
4706 	}
4707 
4708 	skip(s, 8);
4709 
4710 	do
4711 	{
4712 		pic_packet_t *packet;
4713 
4714 		if (num_packets == sizeof(packets) / sizeof(packets[0]))
4715 			return 0;
4716 
4717 		packet = &packets[num_packets++];
4718 		chained = get8(s);
4719 		packet->size = get8u(s);
4720 		packet->type = get8u(s);
4721 		packet->channel = get8u(s);
4722 		act_comp |= packet->channel;
4723 
4724 		if (at_eof(s))
4725 		{
4726 			stbi_rewind(s);
4727 			return 0;
4728 		}
4729 		if (packet->size != 8)
4730 		{
4731 			stbi_rewind(s);
4732 			return 0;
4733 		}
4734 	} while (chained);
4735 
4736 	*comp = (act_comp & 0x10 ? 4 : 3);
4737 
4738 	return 1;
4739 }
4740 
stbi_info_main(stbi * s,int * x,int * y,int * comp)4741 static int stbi_info_main(stbi *s, int *x, int *y, int *comp)
4742 {
4743 	if (stbi_jpeg_info(s, x, y, comp))
4744 		return 1;
4745 	if (stbi_png_info(s, x, y, comp))
4746 		return 1;
4747 	if (stbi_gif_info(s, x, y, comp))
4748 		return 1;
4749 	if (stbi_bmp_info(s, x, y, comp))
4750 		return 1;
4751 	if (stbi_psd_info(s, x, y, comp))
4752 		return 1;
4753 	if (stbi_pic_info(s, x, y, comp))
4754 		return 1;
4755 #ifndef STBI_NO_HDR
4756 	if (stbi_hdr_info(s, x, y, comp))
4757 		return 1;
4758 #endif
4759 	// test tga last because it's a crappy test!
4760 	if (stbi_tga_info(s, x, y, comp))
4761 		return 1;
4762 	return e("unknown image type", "Image not of any known type, or corrupt");
4763 }
4764 
4765 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)4766 int stbi_info(char const *filename, int *x, int *y, int *comp)
4767 {
4768 	FILE *f = fopen(filename, "rb");
4769 	int result;
4770 	if (!f) return e("can't fopen", "Unable to open file");
4771 	result = stbi_info_from_file(f, x, y, comp);
4772 	fclose(f);
4773 	return result;
4774 }
4775 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)4776 int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
4777 {
4778 	int r;
4779 	stbi s;
4780 	long pos = ftell(f);
4781 	start_file(&s, f);
4782 	r = stbi_info_main(&s, x, y, comp);
4783 	fseek(f, pos, SEEK_SET);
4784 	return r;
4785 }
4786 #endif  // !STBI_NO_STDIO
4787 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)4788 int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
4789 {
4790 	stbi s;
4791 	start_mem(&s, buffer, len);
4792 	return stbi_info_main(&s, x, y, comp);
4793 }
4794 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)4795 int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
4796 {
4797 	stbi s;
4798 	start_callbacks(&s, (stbi_io_callbacks *)c, user);
4799 	return stbi_info_main(&s, x, y, comp);
4800 }
4801 
4802 #endif  // STBI_HEADER_FILE_ONLY
4803 
4804 /*
4805    revision history:
4806       1.33 (2011-07-14)
4807              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
4808       1.32 (2011-07-13)
4809              support for "info" function for all supported filetypes (SpartanJ)
4810       1.31 (2011-06-20)
4811              a few more leak fixes, bug in PNG handling (SpartanJ)
4812       1.30 (2011-06-11)
4813              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
4814              removed deprecated format-specific test/load functions
4815              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
4816              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
4817              fix inefficiency in decoding 32-bit BMP (David Woo)
4818       1.29 (2010-08-16)
4819              various warning fixes from Aurelien Pocheville
4820       1.28 (2010-08-01)
4821              fix bug in GIF palette transparency (SpartanJ)
4822       1.27 (2010-08-01)
4823              cast-to-uint8 to fix warnings
4824       1.26 (2010-07-24)
4825              fix bug in file buffering for PNG reported by SpartanJ
4826       1.25 (2010-07-17)
4827              refix trans_data warning (Won Chun)
4828       1.24 (2010-07-12)
4829              perf improvements reading from files on platforms with lock-heavy fgetc()
4830              minor perf improvements for jpeg
4831              deprecated type-specific functions so we'll get feedback if they're needed
4832              attempt to fix trans_data warning (Won Chun)
4833       1.23   fixed bug in iPhone support
4834       1.22 (2010-07-10)
4835              removed image *writing* support
4836              stbi_info support from Jetro Lauha
4837              GIF support from Jean-Marc Lienher
4838              iPhone PNG-extensions from James Brown
4839              warning-fixes from Nicolas Schulz and Janez Zemva (i.e. Janez (U+017D)emva)
4840       1.21   fix use of 'uint8' in header (reported by jon blow)
4841       1.20   added support for Softimage PIC, by Tom Seddon
4842       1.19   bug in interlaced PNG corruption check (found by ryg)
4843       1.18 2008-08-02
4844              fix a threading bug (local mutable static)
4845       1.17   support interlaced PNG
4846       1.16   major bugfix - convert_format converted one too many pixels
4847       1.15   initialize some fields for thread safety
4848       1.14   fix threadsafe conversion bug
4849              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
4850       1.13   threadsafe
4851       1.12   const qualifiers in the API
4852       1.11   Support installable IDCT, colorspace conversion routines
4853       1.10   Fixes for 64-bit (don't use "unsigned long")
4854              optimized upsampling by Fabian "ryg" Giesen
4855       1.09   Fix format-conversion for PSD code (bad global variables!)
4856       1.08   Thatcher Ulrich's PSD code integrated by Nicolas Schulz
4857       1.07   attempt to fix C++ warning/errors again
4858       1.06   attempt to fix C++ warning/errors again
4859       1.05   fix TGA loading to return correct *comp and use good luminance calc
4860       1.04   default float alpha is 1, not 255; use 'void *' for stbi_image_free
4861       1.03   bugfixes to STBI_NO_STDIO, STBI_NO_HDR
4862       1.02   support for (subset of) HDR files, float interface for preferred access to them
4863       1.01   fix bug: possible bug in handling right-side up bmps... not sure
4864              fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all
4865       1.00   interface to zlib that skips zlib header
4866       0.99   correct handling of alpha in palette
4867       0.98   TGA loader by lonesock; dynamically add loaders (untested)
4868       0.97   jpeg errors on too large a file; also catch another malloc failure
4869       0.96   fix detection of invalid v value - particleman@mollyrocket forum
4870       0.95   during header scan, seek to markers in case of padding
4871       0.94   STBI_NO_STDIO to disable stdio usage; rename all #defines the same
4872       0.93   handle jpegtran output; verbose errors
4873       0.92   read 4,8,16,24,32-bit BMP files of several formats
4874       0.91   output 24-bit Windows 3.0 BMP files
4875       0.90   fix a few more warnings; bump version number to approach 1.0
4876       0.61   bugfixes due to Marc LeBlanc, Christopher Lloyd
4877       0.60   fix compiling as c++
4878       0.59   fix warnings: merge Dave Moore's -Wall fixes
4879       0.58   fix bug: zlib uncompressed mode len/nlen was wrong endian
4880       0.57   fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
4881       0.56   fix bug: zlib uncompressed mode len vs. nlen
4882       0.55   fix bug: restart_interval not initialized to 0
4883       0.54   allow NULL for 'int *comp'
4884       0.53   fix bug in png 3->4; speedup png decoding
4885       0.52   png handles req_comp=3,4 directly; minor cleanup; jpeg comments
4886       0.51   obey req_comp requests, 1-component jpegs return as 1-component,
4887              on 'test' only check type, not whether we support this variant
4888       0.50   first released version
4889 */
4890