1 /* stb_image - v2.19 - public domain image loader - http://nothings.org/stb
2                                   no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.19  (2018-02-11) fix warning
52       2.18  (2018-01-30) fix warnings
53       2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
54       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
55       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
56       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
57       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
58       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
59       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
60                          RGB-format JPEG; remove white matting in PSD;
61                          allocate large structures on the stack;
62                          correct channel count for PNG & BMP
63       2.10  (2016-01-22) avoid warning introduced in 2.09
64       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
65 
66    See end of file for full revision history.
67 
68 
69  ============================    Contributors    =========================
70 
71  Image formats                          Extensions, features
72     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
73     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
74     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
75     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
76     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
77     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
78     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
79     github:urraka (animated gif)           Junggon Kim (PNM comments)
80     Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
81                                            socks-the-fox (16-bit PNG)
82                                            Jeremy Sawicki (handle all ImageNet JPGs)
83  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
84     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
85     Arseny Kapoulkine
86     John-Mark Allen
87 
88  Bug & warning fixes
89     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
90     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
91     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
92     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
93     the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
94     Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
95     Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
96     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
97     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
98     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
99     Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
100     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
101     Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
102     Christian Floisand      Kevin Schmidt                         github:darealshinji
103     Blazej Dariusz Roszkowski                                     github:Michaelangel007
104 */
105 
106 #ifndef STBI_INCLUDE_STB_IMAGE_H
107 #define STBI_INCLUDE_STB_IMAGE_H
108 
109 // DOCUMENTATION
110 //
111 // Limitations:
112 //    - no 12-bit-per-channel JPEG
113 //    - no JPEGs with arithmetic coding
114 //    - GIF always returns *comp=4
115 //
116 // Basic usage (see HDR discussion below for HDR usage):
117 //    int x,y,n;
118 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
119 //    // ... process data if not NULL ...
120 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
121 //    // ... replace '0' with '1'..'4' to force that many components per pixel
122 //    // ... but 'n' will always be the number that it would have been if you said 0
123 //    stbi_image_free(data)
124 //
125 // Standard parameters:
126 //    int *x                 -- outputs image width in pixels
127 //    int *y                 -- outputs image height in pixels
128 //    int *channels_in_file  -- outputs # of image components in image file
129 //    int desired_channels   -- if non-zero, # of image components requested in result
130 //
131 // The return value from an image loader is an 'unsigned char *' which points
132 // to the pixel data, or NULL on an allocation failure or if the image is
133 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
134 // with each pixel consisting of N interleaved 8-bit components; the first
135 // pixel pointed to is top-left-most in the image. There is no padding between
136 // image scanlines or between pixels, regardless of format. The number of
137 // components N is 'desired_channels' if desired_channels is non-zero, or
138 // *channels_in_file otherwise. If desired_channels is non-zero,
139 // *channels_in_file has the number of components that _would_ have been
140 // output otherwise. E.g. if you set desired_channels to 4, you will always
141 // get RGBA output, but you can check *channels_in_file to see if it's trivially
142 // opaque because e.g. there were only 3 channels in the source image.
143 //
144 // An output image with N components has the following components interleaved
145 // in this order in each pixel:
146 //
147 //     N=#comp     components
148 //       1           grey
149 //       2           grey, alpha
150 //       3           red, green, blue
151 //       4           red, green, blue, alpha
152 //
153 // If image loading fails for any reason, the return value will be NULL,
154 // and *x, *y, *channels_in_file will be unchanged. The function
155 // stbi_failure_reason() can be queried for an extremely brief, end-user
156 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
157 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
158 // more user-friendly ones.
159 //
160 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
161 //
162 // ===========================================================================
163 //
164 // Philosophy
165 //
166 // stb libraries are designed with the following priorities:
167 //
168 //    1. easy to use
169 //    2. easy to maintain
170 //    3. good performance
171 //
172 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
173 // and for best performance I may provide less-easy-to-use APIs that give higher
174 // performance, in addition to the easy to use ones. Nevertheless, it's important
175 // to keep in mind that from the standpoint of you, a client of this library,
176 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
177 //
178 // Some secondary priorities arise directly from the first two, some of which
179 // make more explicit reasons why performance can't be emphasized.
180 //
181 //    - Portable ("ease of use")
182 //    - Small source code footprint ("easy to maintain")
183 //    - No dependencies ("ease of use")
184 //
185 // ===========================================================================
186 //
187 // I/O callbacks
188 //
189 // I/O callbacks allow you to read from arbitrary sources, like packaged
190 // files or some other source. Data read from callbacks are processed
191 // through a small internal buffer (currently 128 bytes) to try to reduce
192 // overhead.
193 //
194 // The three functions you must define are "read" (reads some bytes of data),
195 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
196 //
197 // ===========================================================================
198 //
199 // SIMD support
200 //
201 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
202 // supported by the compiler. For ARM Neon support, you must explicitly
203 // request it.
204 //
205 // (The old do-it-yourself SIMD API is no longer supported in the current
206 // code.)
207 //
208 // On x86, SSE2 will automatically be used when available based on a run-time
209 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
210 // the typical path is to have separate builds for NEON and non-NEON devices
211 // (at least this is true for iOS and Android). Therefore, the NEON support is
212 // toggled by a build flag: define STBI_NEON to get NEON loops.
213 //
214 // If for some reason you do not want to use any of SIMD code, or if
215 // you have issues compiling it, you can disable it entirely by
216 // defining STBI_NO_SIMD.
217 //
218 // ===========================================================================
219 //
220 // HDR image support   (disable by defining STBI_NO_HDR)
221 //
222 // stb_image now supports loading HDR images in general, and currently
223 // the Radiance .HDR file format, although the support is provided
224 // generically. You can still load any file through the existing interface;
225 // if you attempt to load an HDR file, it will be automatically remapped to
226 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
227 // both of these constants can be reconfigured through this interface:
228 //
229 //     stbi_hdr_to_ldr_gamma(2.2f);
230 //     stbi_hdr_to_ldr_scale(1.0f);
231 //
232 // (note, do not use _inverse_ constants; stbi_image will invert them
233 // appropriately).
234 //
235 // Additionally, there is a new, parallel interface for loading files as
236 // (linear) floats to preserve the full dynamic range:
237 //
238 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
239 //
240 // If you load LDR images through this interface, those images will
241 // be promoted to floating point values, run through the inverse of
242 // constants corresponding to the above:
243 //
244 //     stbi_ldr_to_hdr_scale(1.0f);
245 //     stbi_ldr_to_hdr_gamma(2.2f);
246 //
247 // Finally, given a filename (or an open file or memory block--see header
248 // file for details) containing image data, you can query for the "most
249 // appropriate" interface to use (that is, whether the image is HDR or
250 // not), using:
251 //
252 //     stbi_is_hdr(char *filename);
253 //
254 // ===========================================================================
255 //
256 // iPhone PNG support:
257 //
258 // By default we convert iphone-formatted PNGs back to RGB, even though
259 // they are internally encoded differently. You can disable this conversion
260 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
261 // you will always just get the native iphone "format" through (which
262 // is BGR stored in RGB).
263 //
264 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
265 // pixel to remove any premultiplied alpha *only* if the image file explicitly
266 // says there's premultiplied data (currently only happens in iPhone images,
267 // and only if iPhone convert-to-rgb processing is on).
268 //
269 // ===========================================================================
270 //
271 // ADDITIONAL CONFIGURATION
272 //
273 //  - You can suppress implementation of any of the decoders to reduce
274 //    your code footprint by #defining one or more of the following
275 //    symbols before creating the implementation.
276 //
277 //        STBI_NO_JPEG
278 //        STBI_NO_PNG
279 //        STBI_NO_BMP
280 //        STBI_NO_PSD
281 //        STBI_NO_TGA
282 //        STBI_NO_GIF
283 //        STBI_NO_HDR
284 //        STBI_NO_PIC
285 //        STBI_NO_PNM   (.ppm and .pgm)
286 //
287 //  - You can request *only* certain decoders and suppress all other ones
288 //    (this will be more forward-compatible, as addition of new decoders
289 //    doesn't require you to disable them explicitly):
290 //
291 //        STBI_ONLY_JPEG
292 //        STBI_ONLY_PNG
293 //        STBI_ONLY_BMP
294 //        STBI_ONLY_PSD
295 //        STBI_ONLY_TGA
296 //        STBI_ONLY_GIF
297 //        STBI_ONLY_HDR
298 //        STBI_ONLY_PIC
299 //        STBI_ONLY_PNM   (.ppm and .pgm)
300 //
301 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
302 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
303 //
304 
305 
306 #ifndef STBI_NO_STDIO
307 #include <stdio.h>
308 #endif // STBI_NO_STDIO
309 
310 #define STBI_VERSION 1
311 
312 enum
313 {
314    STBI_default = 0, // only used for desired_channels
315 
316    STBI_grey       = 1,
317    STBI_grey_alpha = 2,
318    STBI_rgb        = 3,
319    STBI_rgb_alpha  = 4
320 };
321 
322 typedef unsigned char stbi_uc;
323 typedef unsigned short stbi_us;
324 
325 #ifdef __cplusplus
326 extern "C" {
327 #endif
328 
329 #ifdef STB_IMAGE_STATIC
330 #define STBIDEF static
331 #else
332 #define STBIDEF extern
333 #endif
334 
335 //////////////////////////////////////////////////////////////////////////////
336 //
337 // PRIMARY API - works on images of any type
338 //
339 
340 //
341 // load image by filename, open file, or memory buffer
342 //
343 
344 typedef struct
345 {
346    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
347    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
348    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
349 } stbi_io_callbacks;
350 
351 ////////////////////////////////////
352 //
353 // 8-bits-per-channel interface
354 //
355 
356 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
357 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
358 #ifndef STBI_NO_GIF
359 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
360 #endif
361 
362 
363 #ifndef STBI_NO_STDIO
364 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
365 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
366 // for stbi_load_from_file, file pointer is left pointing immediately after image
367 #endif
368 
369 ////////////////////////////////////
370 //
371 // 16-bits-per-channel interface
372 //
373 
374 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
375 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
376 
377 #ifndef STBI_NO_STDIO
378 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
379 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
380 #endif
381 
382 ////////////////////////////////////
383 //
384 // float-per-channel interface
385 //
386 #ifndef STBI_NO_LINEAR
387    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
388    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
389 
390    #ifndef STBI_NO_STDIO
391    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
392    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
393    #endif
394 #endif
395 
396 #ifndef STBI_NO_HDR
397    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
398    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
399 #endif // STBI_NO_HDR
400 
401 #ifndef STBI_NO_LINEAR
402    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
403    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
404 #endif // STBI_NO_LINEAR
405 
406 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
407 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
408 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
409 #ifndef STBI_NO_STDIO
410 STBIDEF int      stbi_is_hdr          (char const *filename);
411 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
412 #endif // STBI_NO_STDIO
413 
414 
415 // get a VERY brief reason for failure
416 // NOT THREADSAFE
417 STBIDEF const char *stbi_failure_reason  (void);
418 
419 // free the loaded image -- this is just free()
420 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
421 
422 // get image dimensions & components without fully decoding
423 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
424 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
425 STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
426 STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
427 
428 #ifndef STBI_NO_STDIO
429 STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
430 STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
431 STBIDEF int      stbi_is_16_bit          (char const *filename);
432 STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
433 #endif
434 
435 
436 
437 // for image formats that explicitly notate that they have premultiplied alpha,
438 // we just return the colors as stored in the file. set this flag to force
439 // unpremultiplication. results are undefined if the unpremultiply overflow.
440 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
441 
442 // indicate whether we should process iphone images back to canonical format,
443 // or just pass them through "as-is"
444 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
445 
446 // flip the image vertically, so the first pixel in the output array is the bottom left
447 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
448 
449 // ZLIB client - used by PNG, available for other purposes
450 
451 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
452 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
453 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
454 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
455 
456 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
457 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
458 
459 
460 #ifdef __cplusplus
461 }
462 #endif
463 
464 //
465 //
466 ////   end header file   /////////////////////////////////////////////////////
467 #endif // STBI_INCLUDE_STB_IMAGE_H
468 
469 #ifdef STB_IMAGE_IMPLEMENTATION
470 
471 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
472   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
473   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
474   || defined(STBI_ONLY_ZLIB)
475    #ifndef STBI_ONLY_JPEG
476    #define STBI_NO_JPEG
477    #endif
478    #ifndef STBI_ONLY_PNG
479    #define STBI_NO_PNG
480    #endif
481    #ifndef STBI_ONLY_BMP
482    #define STBI_NO_BMP
483    #endif
484    #ifndef STBI_ONLY_PSD
485    #define STBI_NO_PSD
486    #endif
487    #ifndef STBI_ONLY_TGA
488    #define STBI_NO_TGA
489    #endif
490    #ifndef STBI_ONLY_GIF
491    #define STBI_NO_GIF
492    #endif
493    #ifndef STBI_ONLY_HDR
494    #define STBI_NO_HDR
495    #endif
496    #ifndef STBI_ONLY_PIC
497    #define STBI_NO_PIC
498    #endif
499    #ifndef STBI_ONLY_PNM
500    #define STBI_NO_PNM
501    #endif
502 #endif
503 
504 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
505 #define STBI_NO_ZLIB
506 #endif
507 
508 
509 #include <stdarg.h>
510 #include <stddef.h> // ptrdiff_t on osx
511 #include <stdlib.h>
512 #include <string.h>
513 #include <limits.h>
514 
515 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
516 #include <math.h>  // ldexp, pow
517 #endif
518 
519 #ifndef STBI_NO_STDIO
520 #include <stdio.h>
521 #endif
522 
523 #ifndef STBI_ASSERT
524 #include <assert.h>
525 #define STBI_ASSERT(x) assert(x)
526 #endif
527 
528 
529 #ifndef _MSC_VER
530    #ifdef __cplusplus
531    #define stbi_inline inline
532    #else
533    #define stbi_inline
534    #endif
535 #else
536    #define stbi_inline __forceinline
537 #endif
538 
539 
540 #ifdef _MSC_VER
541 typedef unsigned short stbi__uint16;
542 typedef   signed short stbi__int16;
543 typedef unsigned int   stbi__uint32;
544 typedef   signed int   stbi__int32;
545 #else
546 #include <stdint.h>
547 typedef uint16_t stbi__uint16;
548 typedef int16_t  stbi__int16;
549 typedef uint32_t stbi__uint32;
550 typedef int32_t  stbi__int32;
551 #endif
552 
553 // should produce compiler error if size is wrong
554 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
555 
556 #ifdef _MSC_VER
557 #define STBI_NOTUSED(v)  (void)(v)
558 #else
559 #define STBI_NOTUSED(v)  (void)sizeof(v)
560 #endif
561 
562 #ifdef _MSC_VER
563 #define STBI_HAS_LROTL
564 #endif
565 
566 #ifdef STBI_HAS_LROTL
567    #define stbi_lrot(x,y)  _lrotl(x,y)
568 #else
569    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
570 #endif
571 
572 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
573 // ok
574 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
575 // ok
576 #else
577 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
578 #endif
579 
580 #ifndef STBI_MALLOC
581 #define STBI_MALLOC(sz)           malloc(sz)
582 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
583 #define STBI_FREE(p)              free(p)
584 #endif
585 
586 #ifndef STBI_REALLOC_SIZED
587 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
588 #endif
589 
590 // x86/x64 detection
591 #if defined(__x86_64__) || defined(_M_X64)
592 #define STBI__X64_TARGET
593 #elif defined(__i386) || defined(_M_IX86)
594 #define STBI__X86_TARGET
595 #endif
596 
597 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
598 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
599 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
600 // but previous attempts to provide the SSE2 functions with runtime
601 // detection caused numerous issues. The way architecture extensions are
602 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
603 // New behavior: if compiled with -msse2, we use SSE2 without any
604 // detection; if not, we don't use it at all.
605 #define STBI_NO_SIMD
606 #endif
607 
608 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
609 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
610 //
611 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
612 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
613 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
614 // simultaneously enabling "-mstackrealign".
615 //
616 // See https://github.com/nothings/stb/issues/81 for more information.
617 //
618 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
619 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
620 #define STBI_NO_SIMD
621 #endif
622 
623 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
624 #define STBI_SSE2
625 #include <emmintrin.h>
626 
627 #ifdef _MSC_VER
628 
629 #if _MSC_VER >= 1400  // not VC6
630 #include <intrin.h> // __cpuid
stbi__cpuid3(void)631 static int stbi__cpuid3(void)
632 {
633    int info[4];
634    __cpuid(info,1);
635    return info[3];
636 }
637 #else
stbi__cpuid3(void)638 static int stbi__cpuid3(void)
639 {
640    int res;
641    __asm {
642       mov  eax,1
643       cpuid
644       mov  res,edx
645    }
646    return res;
647 }
648 #endif
649 
650 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
651 
stbi__sse2_available(void)652 static int stbi__sse2_available(void)
653 {
654    int info3 = stbi__cpuid3();
655    return ((info3 >> 26) & 1) != 0;
656 }
657 #else // assume GCC-style if not VC++
658 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
659 
stbi__sse2_available(void)660 static int stbi__sse2_available(void)
661 {
662    // If we're even attempting to compile this on GCC/Clang, that means
663    // -msse2 is on, which means the compiler is allowed to use SSE2
664    // instructions at will, and so are we.
665    return 1;
666 }
667 #endif
668 #endif
669 
670 // ARM NEON
671 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
672 #undef STBI_NEON
673 #endif
674 
675 #ifdef STBI_NEON
676 #include <arm_neon.h>
677 // assume GCC or Clang on ARM targets
678 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
679 #endif
680 
681 #ifndef STBI_SIMD_ALIGN
682 #define STBI_SIMD_ALIGN(type, name) type name
683 #endif
684 
685 ///////////////////////////////////////////////
686 //
687 //  stbi__context struct and start_xxx functions
688 
689 // stbi__context structure is our basic context used by all images, so it
690 // contains all the IO context, plus some basic image information
691 typedef struct
692 {
693    stbi__uint32 img_x, img_y;
694    int img_n, img_out_n;
695 
696    stbi_io_callbacks io;
697    void *io_user_data;
698 
699    int read_from_callbacks;
700    int buflen;
701    stbi_uc buffer_start[128];
702 
703    stbi_uc *img_buffer, *img_buffer_end;
704    stbi_uc *img_buffer_original, *img_buffer_original_end;
705 } stbi__context;
706 
707 
708 static void stbi__refill_buffer(stbi__context *s);
709 
710 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)711 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
712 {
713    s->io.read = NULL;
714    s->read_from_callbacks = 0;
715    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
716    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
717 }
718 
719 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)720 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
721 {
722    s->io = *c;
723    s->io_user_data = user;
724    s->buflen = sizeof(s->buffer_start);
725    s->read_from_callbacks = 1;
726    s->img_buffer_original = s->buffer_start;
727    stbi__refill_buffer(s);
728    s->img_buffer_original_end = s->img_buffer_end;
729 }
730 
731 #ifndef STBI_NO_STDIO
732 
stbi__stdio_read(void * user,char * data,int size)733 static int stbi__stdio_read(void *user, char *data, int size)
734 {
735    return (int) fread(data,1,size,(FILE*) user);
736 }
737 
stbi__stdio_skip(void * user,int n)738 static void stbi__stdio_skip(void *user, int n)
739 {
740    fseek((FILE*) user, n, SEEK_CUR);
741 }
742 
stbi__stdio_eof(void * user)743 static int stbi__stdio_eof(void *user)
744 {
745    return feof((FILE*) user);
746 }
747 
748 static stbi_io_callbacks stbi__stdio_callbacks =
749 {
750    stbi__stdio_read,
751    stbi__stdio_skip,
752    stbi__stdio_eof,
753 };
754 
stbi__start_file(stbi__context * s,FILE * f)755 static void stbi__start_file(stbi__context *s, FILE *f)
756 {
757    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
758 }
759 
760 //static void stop_file(stbi__context *s) { }
761 
762 #endif // !STBI_NO_STDIO
763 
stbi__rewind(stbi__context * s)764 static void stbi__rewind(stbi__context *s)
765 {
766    // conceptually rewind SHOULD rewind to the beginning of the stream,
767    // but we just rewind to the beginning of the initial buffer, because
768    // we only use it after doing 'test', which only ever looks at at most 92 bytes
769    s->img_buffer = s->img_buffer_original;
770    s->img_buffer_end = s->img_buffer_original_end;
771 }
772 
773 enum
774 {
775    STBI_ORDER_RGB,
776    STBI_ORDER_BGR
777 };
778 
779 typedef struct
780 {
781    int bits_per_channel;
782    int num_channels;
783    int channel_order;
784 } stbi__result_info;
785 
786 #ifndef STBI_NO_JPEG
787 static int      stbi__jpeg_test(stbi__context *s);
788 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
789 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
790 #endif
791 
792 #ifndef STBI_NO_PNG
793 static int      stbi__png_test(stbi__context *s);
794 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
795 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
796 static int      stbi__png_is16(stbi__context *s);
797 #endif
798 
799 #ifndef STBI_NO_BMP
800 static int      stbi__bmp_test(stbi__context *s);
801 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
802 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
803 #endif
804 
805 #ifndef STBI_NO_TGA
806 static int      stbi__tga_test(stbi__context *s);
807 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
808 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
809 #endif
810 
811 #ifndef STBI_NO_PSD
812 static int      stbi__psd_test(stbi__context *s);
813 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
814 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
815 static int      stbi__psd_is16(stbi__context *s);
816 #endif
817 
818 #ifndef STBI_NO_HDR
819 static int      stbi__hdr_test(stbi__context *s);
820 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
821 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
822 #endif
823 
824 #ifndef STBI_NO_PIC
825 static int      stbi__pic_test(stbi__context *s);
826 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
827 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
828 #endif
829 
830 #ifndef STBI_NO_GIF
831 static int      stbi__gif_test(stbi__context *s);
832 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
833 static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
834 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
835 #endif
836 
837 #ifndef STBI_NO_PNM
838 static int      stbi__pnm_test(stbi__context *s);
839 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
840 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
841 #endif
842 
843 // this is not threadsafe
844 static const char *stbi__g_failure_reason;
845 
stbi_failure_reason(void)846 STBIDEF const char *stbi_failure_reason(void)
847 {
848    return stbi__g_failure_reason;
849 }
850 
stbi__err(const char * str)851 static int stbi__err(const char *str)
852 {
853    stbi__g_failure_reason = str;
854    return 0;
855 }
856 
stbi__malloc(size_t size)857 static void *stbi__malloc(size_t size)
858 {
859     return STBI_MALLOC(size);
860 }
861 
862 // stb_image uses ints pervasively, including for offset calculations.
863 // therefore the largest decoded image size we can support with the
864 // current code, even on 64-bit targets, is INT_MAX. this is not a
865 // significant limitation for the intended use case.
866 //
867 // we do, however, need to make sure our size calculations don't
868 // overflow. hence a few helper functions for size calculations that
869 // multiply integers together, making sure that they're non-negative
870 // and no overflow occurs.
871 
872 // return 1 if the sum is valid, 0 on overflow.
873 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)874 static int stbi__addsizes_valid(int a, int b)
875 {
876    if (b < 0) return 0;
877    // now 0 <= b <= INT_MAX, hence also
878    // 0 <= INT_MAX - b <= INTMAX.
879    // And "a + b <= INT_MAX" (which might overflow) is the
880    // same as a <= INT_MAX - b (no overflow)
881    return a <= INT_MAX - b;
882 }
883 
884 // returns 1 if the product is valid, 0 on overflow.
885 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)886 static int stbi__mul2sizes_valid(int a, int b)
887 {
888    if (a < 0 || b < 0) return 0;
889    if (b == 0) return 1; // mul-by-0 is always safe
890    // portable way to check for no overflows in a*b
891    return a <= INT_MAX/b;
892 }
893 
894 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)895 static int stbi__mad2sizes_valid(int a, int b, int add)
896 {
897    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
898 }
899 
900 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)901 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
902 {
903    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
904       stbi__addsizes_valid(a*b*c, add);
905 }
906 
907 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
908 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)909 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
910 {
911    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
912       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
913 }
914 #endif
915 
916 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)917 static void *stbi__malloc_mad2(int a, int b, int add)
918 {
919    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
920    return stbi__malloc(a*b + add);
921 }
922 
stbi__malloc_mad3(int a,int b,int c,int add)923 static void *stbi__malloc_mad3(int a, int b, int c, int add)
924 {
925    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
926    return stbi__malloc(a*b*c + add);
927 }
928 
929 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__malloc_mad4(int a,int b,int c,int d,int add)930 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
931 {
932    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
933    return stbi__malloc(a*b*c*d + add);
934 }
935 #endif
936 
937 // stbi__err - error
938 // stbi__errpf - error returning pointer to float
939 // stbi__errpuc - error returning pointer to unsigned char
940 
941 #ifdef STBI_NO_FAILURE_STRINGS
942    #define stbi__err(x,y)  0
943 #elif defined(STBI_FAILURE_USERMSG)
944    #define stbi__err(x,y)  stbi__err(y)
945 #else
946    #define stbi__err(x,y)  stbi__err(x)
947 #endif
948 
949 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
950 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
951 
stbi_image_free(void * retval_from_stbi_load)952 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
953 {
954    STBI_FREE(retval_from_stbi_load);
955 }
956 
957 #ifndef STBI_NO_LINEAR
958 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
959 #endif
960 
961 #ifndef STBI_NO_HDR
962 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
963 #endif
964 
965 static int stbi__vertically_flip_on_load = 0;
966 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)967 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
968 {
969     stbi__vertically_flip_on_load = flag_true_if_should_flip;
970 }
971 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)972 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
973 {
974    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
975    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
976    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
977    ri->num_channels = 0;
978 
979    #ifndef STBI_NO_JPEG
980    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
981    #endif
982    #ifndef STBI_NO_PNG
983    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
984    #endif
985    #ifndef STBI_NO_BMP
986    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
987    #endif
988    #ifndef STBI_NO_GIF
989    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
990    #endif
991    #ifndef STBI_NO_PSD
992    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
993    #endif
994    #ifndef STBI_NO_PIC
995    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
996    #endif
997    #ifndef STBI_NO_PNM
998    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
999    #endif
1000 
1001    #ifndef STBI_NO_HDR
1002    if (stbi__hdr_test(s)) {
1003       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1004       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1005    }
1006    #endif
1007 
1008    #ifndef STBI_NO_TGA
1009    // test tga last because it's a crappy test!
1010    if (stbi__tga_test(s))
1011       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1012    #endif
1013 
1014    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1015 }
1016 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1017 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1018 {
1019    int i;
1020    int img_len = w * h * channels;
1021    stbi_uc *reduced;
1022 
1023    reduced = (stbi_uc *) stbi__malloc(img_len);
1024    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1025 
1026    for (i = 0; i < img_len; ++i)
1027       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1028 
1029    STBI_FREE(orig);
1030    return reduced;
1031 }
1032 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1033 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1034 {
1035    int i;
1036    int img_len = w * h * channels;
1037    stbi__uint16 *enlarged;
1038 
1039    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1040    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1041 
1042    for (i = 0; i < img_len; ++i)
1043       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1044 
1045    STBI_FREE(orig);
1046    return enlarged;
1047 }
1048 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1049 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1050 {
1051    int row;
1052    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1053    stbi_uc temp[2048];
1054    stbi_uc *bytes = (stbi_uc *)image;
1055 
1056    for (row = 0; row < (h>>1); row++) {
1057       stbi_uc *row0 = bytes + row*bytes_per_row;
1058       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1059       // swap row0 with row1
1060       size_t bytes_left = bytes_per_row;
1061       while (bytes_left) {
1062          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1063          memcpy(temp, row0, bytes_copy);
1064          memcpy(row0, row1, bytes_copy);
1065          memcpy(row1, temp, bytes_copy);
1066          row0 += bytes_copy;
1067          row1 += bytes_copy;
1068          bytes_left -= bytes_copy;
1069       }
1070    }
1071 }
1072 
stbi__vertical_flip_slices(void * image,int w,int h,int z,int bytes_per_pixel)1073 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1074 {
1075    int slice;
1076    int slice_size = w * h * bytes_per_pixel;
1077 
1078    stbi_uc *bytes = (stbi_uc *)image;
1079    for (slice = 0; slice < z; ++slice) {
1080       stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1081       bytes += slice_size;
1082    }
1083 }
1084 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1085 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1086 {
1087    stbi__result_info ri;
1088    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1089 
1090    if (result == NULL)
1091       return NULL;
1092 
1093    if (ri.bits_per_channel != 8) {
1094       STBI_ASSERT(ri.bits_per_channel == 16);
1095       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1096       ri.bits_per_channel = 8;
1097    }
1098 
1099    // @TODO: move stbi__convert_format to here
1100 
1101    if (stbi__vertically_flip_on_load) {
1102       int channels = req_comp ? req_comp : *comp;
1103       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1104    }
1105 
1106    return (unsigned char *) result;
1107 }
1108 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1109 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1110 {
1111    stbi__result_info ri;
1112    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1113 
1114    if (result == NULL)
1115       return NULL;
1116 
1117    if (ri.bits_per_channel != 16) {
1118       STBI_ASSERT(ri.bits_per_channel == 8);
1119       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1120       ri.bits_per_channel = 16;
1121    }
1122 
1123    // @TODO: move stbi__convert_format16 to here
1124    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1125 
1126    if (stbi__vertically_flip_on_load) {
1127       int channels = req_comp ? req_comp : *comp;
1128       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1129    }
1130 
1131    return (stbi__uint16 *) result;
1132 }
1133 
1134 #if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR)
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1135 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1136 {
1137    if (stbi__vertically_flip_on_load && result != NULL) {
1138       int channels = req_comp ? req_comp : *comp;
1139       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1140    }
1141 }
1142 #endif
1143 
1144 #ifndef STBI_NO_STDIO
1145 
stbi__fopen(char const * filename,char const * mode)1146 static FILE *stbi__fopen(char const *filename, char const *mode)
1147 {
1148    FILE *f;
1149 #if defined(_MSC_VER) && _MSC_VER >= 1400
1150    if (0 != fopen_s(&f, filename, mode))
1151       f=0;
1152 #else
1153    f = fopen(filename, mode);
1154 #endif
1155    return f;
1156 }
1157 
1158 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1159 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1160 {
1161    FILE *f = stbi__fopen(filename, "rb");
1162    unsigned char *result;
1163    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1164    result = stbi_load_from_file(f,x,y,comp,req_comp);
1165    fclose(f);
1166    return result;
1167 }
1168 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1169 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1170 {
1171    unsigned char *result;
1172    stbi__context s;
1173    stbi__start_file(&s,f);
1174    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1175    if (result) {
1176       // need to 'unget' all the characters in the IO buffer
1177       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1178    }
1179    return result;
1180 }
1181 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1182 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1183 {
1184    stbi__uint16 *result;
1185    stbi__context s;
1186    stbi__start_file(&s,f);
1187    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1188    if (result) {
1189       // need to 'unget' all the characters in the IO buffer
1190       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1191    }
1192    return result;
1193 }
1194 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1195 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1196 {
1197    FILE *f = stbi__fopen(filename, "rb");
1198    stbi__uint16 *result;
1199    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1200    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1201    fclose(f);
1202    return result;
1203 }
1204 
1205 
1206 #endif //!STBI_NO_STDIO
1207 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1208 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1209 {
1210    stbi__context s;
1211    stbi__start_mem(&s,buffer,len);
1212    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1213 }
1214 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1215 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1216 {
1217    stbi__context s;
1218    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1219    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1220 }
1221 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1222 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1223 {
1224    stbi__context s;
1225    stbi__start_mem(&s,buffer,len);
1226    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1227 }
1228 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1229 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1230 {
1231    stbi__context s;
1232    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1233    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1234 }
1235 
1236 #ifndef STBI_NO_GIF
stbi_load_gif_from_memory(stbi_uc const * buffer,int len,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)1237 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1238 {
1239    unsigned char *result;
1240    stbi__context s;
1241    stbi__start_mem(&s,buffer,len);
1242 
1243    result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1244    if (stbi__vertically_flip_on_load) {
1245       stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1246    }
1247 
1248    return result;
1249 }
1250 #endif
1251 
1252 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1253 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1254 {
1255    unsigned char *data;
1256    #ifndef STBI_NO_HDR
1257    if (stbi__hdr_test(s)) {
1258       stbi__result_info ri;
1259       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1260       if (hdr_data)
1261          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1262       return hdr_data;
1263    }
1264    #endif
1265    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1266    if (data)
1267       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1268    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1269 }
1270 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1271 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1272 {
1273    stbi__context s;
1274    stbi__start_mem(&s,buffer,len);
1275    return stbi__loadf_main(&s,x,y,comp,req_comp);
1276 }
1277 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1278 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1279 {
1280    stbi__context s;
1281    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1282    return stbi__loadf_main(&s,x,y,comp,req_comp);
1283 }
1284 
1285 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1286 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1287 {
1288    float *result;
1289    FILE *f = stbi__fopen(filename, "rb");
1290    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1291    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1292    fclose(f);
1293    return result;
1294 }
1295 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1296 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1297 {
1298    stbi__context s;
1299    stbi__start_file(&s,f);
1300    return stbi__loadf_main(&s,x,y,comp,req_comp);
1301 }
1302 #endif // !STBI_NO_STDIO
1303 
1304 #endif // !STBI_NO_LINEAR
1305 
1306 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1307 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1308 // reports false!
1309 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1310 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1311 {
1312    #ifndef STBI_NO_HDR
1313    stbi__context s;
1314    stbi__start_mem(&s,buffer,len);
1315    return stbi__hdr_test(&s);
1316    #else
1317    STBI_NOTUSED(buffer);
1318    STBI_NOTUSED(len);
1319    return 0;
1320    #endif
1321 }
1322 
1323 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1324 STBIDEF int      stbi_is_hdr          (char const *filename)
1325 {
1326    FILE *f = stbi__fopen(filename, "rb");
1327    int result=0;
1328    if (f) {
1329       result = stbi_is_hdr_from_file(f);
1330       fclose(f);
1331    }
1332    return result;
1333 }
1334 
stbi_is_hdr_from_file(FILE * f)1335 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1336 {
1337    #ifndef STBI_NO_HDR
1338    long pos = ftell(f);
1339    int res;
1340    stbi__context s;
1341    stbi__start_file(&s,f);
1342    res = stbi__hdr_test(&s);
1343    fseek(f, pos, SEEK_SET);
1344    return res;
1345    #else
1346    STBI_NOTUSED(f);
1347    return 0;
1348    #endif
1349 }
1350 #endif // !STBI_NO_STDIO
1351 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1352 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1353 {
1354    #ifndef STBI_NO_HDR
1355    stbi__context s;
1356    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1357    return stbi__hdr_test(&s);
1358    #else
1359    STBI_NOTUSED(clbk);
1360    STBI_NOTUSED(user);
1361    return 0;
1362    #endif
1363 }
1364 
1365 #ifndef STBI_NO_LINEAR
1366 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1367 
stbi_ldr_to_hdr_gamma(float gamma)1368 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1369 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1370 #endif
1371 
1372 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1373 
stbi_hdr_to_ldr_gamma(float gamma)1374 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1375 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1376 
1377 
1378 //////////////////////////////////////////////////////////////////////////////
1379 //
1380 // Common code used by all image loaders
1381 //
1382 
1383 enum
1384 {
1385    STBI__SCAN_load=0,
1386    STBI__SCAN_type,
1387    STBI__SCAN_header
1388 };
1389 
stbi__refill_buffer(stbi__context * s)1390 static void stbi__refill_buffer(stbi__context *s)
1391 {
1392    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1393    if (n == 0) {
1394       // at end of file, treat same as if from memory, but need to handle case
1395       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1396       s->read_from_callbacks = 0;
1397       s->img_buffer = s->buffer_start;
1398       s->img_buffer_end = s->buffer_start+1;
1399       *s->img_buffer = 0;
1400    } else {
1401       s->img_buffer = s->buffer_start;
1402       s->img_buffer_end = s->buffer_start + n;
1403    }
1404 }
1405 
stbi__get8(stbi__context * s)1406 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1407 {
1408    if (s->img_buffer < s->img_buffer_end)
1409       return *s->img_buffer++;
1410    if (s->read_from_callbacks) {
1411       stbi__refill_buffer(s);
1412       return *s->img_buffer++;
1413    }
1414    return 0;
1415 }
1416 
stbi__at_eof(stbi__context * s)1417 stbi_inline static int stbi__at_eof(stbi__context *s)
1418 {
1419    if (s->io.read) {
1420       if (!(s->io.eof)(s->io_user_data)) return 0;
1421       // if feof() is true, check if buffer = end
1422       // special case: we've only got the special 0 character at the end
1423       if (s->read_from_callbacks == 0) return 1;
1424    }
1425 
1426    return s->img_buffer >= s->img_buffer_end;
1427 }
1428 
stbi__skip(stbi__context * s,int n)1429 static void stbi__skip(stbi__context *s, int n)
1430 {
1431    if (n < 0) {
1432       s->img_buffer = s->img_buffer_end;
1433       return;
1434    }
1435    if (s->io.read) {
1436       int blen = (int) (s->img_buffer_end - s->img_buffer);
1437       if (blen < n) {
1438          s->img_buffer = s->img_buffer_end;
1439          (s->io.skip)(s->io_user_data, n - blen);
1440          return;
1441       }
1442    }
1443    s->img_buffer += n;
1444 }
1445 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1446 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1447 {
1448    if (s->io.read) {
1449       int blen = (int) (s->img_buffer_end - s->img_buffer);
1450       if (blen < n) {
1451          int res, count;
1452 
1453          memcpy(buffer, s->img_buffer, blen);
1454 
1455          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1456          res = (count == (n-blen));
1457          s->img_buffer = s->img_buffer_end;
1458          return res;
1459       }
1460    }
1461 
1462    if (s->img_buffer+n <= s->img_buffer_end) {
1463       memcpy(buffer, s->img_buffer, n);
1464       s->img_buffer += n;
1465       return 1;
1466    } else
1467       return 0;
1468 }
1469 
stbi__get16be(stbi__context * s)1470 static int stbi__get16be(stbi__context *s)
1471 {
1472    int z = stbi__get8(s);
1473    return (z << 8) + stbi__get8(s);
1474 }
1475 
stbi__get32be(stbi__context * s)1476 static stbi__uint32 stbi__get32be(stbi__context *s)
1477 {
1478    stbi__uint32 z = stbi__get16be(s);
1479    return (z << 16) + stbi__get16be(s);
1480 }
1481 
1482 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1483 // nothing
1484 #else
stbi__get16le(stbi__context * s)1485 static int stbi__get16le(stbi__context *s)
1486 {
1487    int z = stbi__get8(s);
1488    return z + (stbi__get8(s) << 8);
1489 }
1490 #endif
1491 
1492 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1493 static stbi__uint32 stbi__get32le(stbi__context *s)
1494 {
1495    stbi__uint32 z = stbi__get16le(s);
1496    return z + (stbi__get16le(s) << 16);
1497 }
1498 #endif
1499 
1500 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1501 
1502 
1503 //////////////////////////////////////////////////////////////////////////////
1504 //
1505 //  generic converter from built-in img_n to req_comp
1506 //    individual types do this automatically as much as possible (e.g. jpeg
1507 //    does all cases internally since it needs to colorspace convert anyway,
1508 //    and it never has alpha, so very few cases ). png can automatically
1509 //    interleave an alpha=255 channel, but falls back to this for other cases
1510 //
1511 //  assume data buffer is malloced, so malloc a new one and free that one
1512 //  only failure mode is malloc failing
1513 
stbi__compute_y(int r,int g,int b)1514 static stbi_uc stbi__compute_y(int r, int g, int b)
1515 {
1516    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1517 }
1518 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1519 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1520 {
1521    int i,j;
1522    unsigned char *good;
1523 
1524    if (req_comp == img_n) return data;
1525    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1526 
1527    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1528    if (good == NULL) {
1529       STBI_FREE(data);
1530       return stbi__errpuc("outofmem", "Out of memory");
1531    }
1532 
1533    for (j=0; j < (int) y; ++j) {
1534       unsigned char *src  = data + j * x * img_n   ;
1535       unsigned char *dest = good + j * x * req_comp;
1536 
1537       #define STBI__COMBO(a,b)  ((a)*8+(b))
1538       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1539       // convert source image with img_n components to one with req_comp components;
1540       // avoid switch per pixel, so use switch per scanline and massive macros
1541       switch (STBI__COMBO(img_n, req_comp)) {
1542          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1543          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1544          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1545          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1546          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1547          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1548          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1549          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1550          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1551          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1552          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1553          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1554          default: STBI_ASSERT(0);
1555       }
1556       #undef STBI__CASE
1557    }
1558 
1559    STBI_FREE(data);
1560    return good;
1561 }
1562 
stbi__compute_y_16(int r,int g,int b)1563 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1564 {
1565    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1566 }
1567 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1568 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1569 {
1570    int i,j;
1571    stbi__uint16 *good;
1572 
1573    if (req_comp == img_n) return data;
1574    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1575 
1576    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1577    if (good == NULL) {
1578       STBI_FREE(data);
1579       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1580    }
1581 
1582    for (j=0; j < (int) y; ++j) {
1583       stbi__uint16 *src  = data + j * x * img_n   ;
1584       stbi__uint16 *dest = good + j * x * req_comp;
1585 
1586       #define STBI__COMBO(a,b)  ((a)*8+(b))
1587       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1588       // convert source image with img_n components to one with req_comp components;
1589       // avoid switch per pixel, so use switch per scanline and massive macros
1590       switch (STBI__COMBO(img_n, req_comp)) {
1591          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1592          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1593          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1594          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1595          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1596          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1597          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1598          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1599          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1600          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1601          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1602          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1603          default: STBI_ASSERT(0);
1604       }
1605       #undef STBI__CASE
1606    }
1607 
1608    STBI_FREE(data);
1609    return good;
1610 }
1611 
1612 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1613 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1614 {
1615    int i,k,n;
1616    float *output;
1617    if (!data) return NULL;
1618    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1619    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1620    // compute number of non-alpha components
1621    if (comp & 1) n = comp; else n = comp-1;
1622    for (i=0; i < x*y; ++i) {
1623       for (k=0; k < n; ++k) {
1624          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1625       }
1626       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1627    }
1628    STBI_FREE(data);
1629    return output;
1630 }
1631 #endif
1632 
1633 #ifndef STBI_NO_HDR
1634 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1635 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1636 {
1637    int i,k,n;
1638    stbi_uc *output;
1639    if (!data) return NULL;
1640    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1641    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1642    // compute number of non-alpha components
1643    if (comp & 1) n = comp; else n = comp-1;
1644    for (i=0; i < x*y; ++i) {
1645       for (k=0; k < n; ++k) {
1646          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1647          if (z < 0) z = 0;
1648          if (z > 255) z = 255;
1649          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1650       }
1651       if (k < comp) {
1652          float z = data[i*comp+k] * 255 + 0.5f;
1653          if (z < 0) z = 0;
1654          if (z > 255) z = 255;
1655          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1656       }
1657    }
1658    STBI_FREE(data);
1659    return output;
1660 }
1661 #endif
1662 
1663 //////////////////////////////////////////////////////////////////////////////
1664 //
1665 //  "baseline" JPEG/JFIF decoder
1666 //
1667 //    simple implementation
1668 //      - doesn't support delayed output of y-dimension
1669 //      - simple interface (only one output format: 8-bit interleaved RGB)
1670 //      - doesn't try to recover corrupt jpegs
1671 //      - doesn't allow partial loading, loading multiple at once
1672 //      - still fast on x86 (copying globals into locals doesn't help x86)
1673 //      - allocates lots of intermediate memory (full size of all components)
1674 //        - non-interleaved case requires this anyway
1675 //        - allows good upsampling (see next)
1676 //    high-quality
1677 //      - upsampled channels are bilinearly interpolated, even across blocks
1678 //      - quality integer IDCT derived from IJG's 'slow'
1679 //    performance
1680 //      - fast huffman; reasonable integer IDCT
1681 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1682 //      - uses a lot of intermediate memory, could cache poorly
1683 
1684 #ifndef STBI_NO_JPEG
1685 
1686 // huffman decoding acceleration
1687 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1688 
1689 typedef struct
1690 {
1691    stbi_uc  fast[1 << FAST_BITS];
1692    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1693    stbi__uint16 code[256];
1694    stbi_uc  values[256];
1695    stbi_uc  size[257];
1696    unsigned int maxcode[18];
1697    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1698 } stbi__huffman;
1699 
1700 typedef struct
1701 {
1702    stbi__context *s;
1703    stbi__huffman huff_dc[4];
1704    stbi__huffman huff_ac[4];
1705    stbi__uint16 dequant[4][64];
1706    stbi__int16 fast_ac[4][1 << FAST_BITS];
1707 
1708 // sizes for components, interleaved MCUs
1709    int img_h_max, img_v_max;
1710    int img_mcu_x, img_mcu_y;
1711    int img_mcu_w, img_mcu_h;
1712 
1713 // definition of jpeg image component
1714    struct
1715    {
1716       int id;
1717       int h,v;
1718       int tq;
1719       int hd,ha;
1720       int dc_pred;
1721 
1722       int x,y,w2,h2;
1723       stbi_uc *data;
1724       void *raw_data, *raw_coeff;
1725       stbi_uc *linebuf;
1726       short   *coeff;   // progressive only
1727       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1728    } img_comp[4];
1729 
1730    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1731    int            code_bits;   // number of valid bits
1732    unsigned char  marker;      // marker seen while filling entropy buffer
1733    int            nomore;      // flag if we saw a marker so must stop
1734 
1735    int            progressive;
1736    int            spec_start;
1737    int            spec_end;
1738    int            succ_high;
1739    int            succ_low;
1740    int            eob_run;
1741    int            jfif;
1742    int            app14_color_transform; // Adobe APP14 tag
1743    int            rgb;
1744 
1745    int scan_n, order[4];
1746    int restart_interval, todo;
1747 
1748 // kernels
1749    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1750    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1751    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1752 } stbi__jpeg;
1753 
stbi__build_huffman(stbi__huffman * h,int * count)1754 static int stbi__build_huffman(stbi__huffman *h, int *count)
1755 {
1756    int i,j,k=0;
1757    unsigned int code;
1758    // build size list for each symbol (from JPEG spec)
1759    for (i=0; i < 16; ++i)
1760       for (j=0; j < count[i]; ++j)
1761          h->size[k++] = (stbi_uc) (i+1);
1762    h->size[k] = 0;
1763 
1764    // compute actual symbols (from jpeg spec)
1765    code = 0;
1766    k = 0;
1767    for(j=1; j <= 16; ++j) {
1768       // compute delta to add to code to compute symbol id
1769       h->delta[j] = k - code;
1770       if (h->size[k] == j) {
1771          while (h->size[k] == j)
1772             h->code[k++] = (stbi__uint16) (code++);
1773          if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1774       }
1775       // compute largest code + 1 for this size, preshifted as needed later
1776       h->maxcode[j] = code << (16-j);
1777       code <<= 1;
1778    }
1779    h->maxcode[j] = 0xffffffff;
1780 
1781    // build non-spec acceleration table; 255 is flag for not-accelerated
1782    memset(h->fast, 255, 1 << FAST_BITS);
1783    for (i=0; i < k; ++i) {
1784       int s = h->size[i];
1785       if (s <= FAST_BITS) {
1786          int c = h->code[i] << (FAST_BITS-s);
1787          int m = 1 << (FAST_BITS-s);
1788          for (j=0; j < m; ++j) {
1789             h->fast[c+j] = (stbi_uc) i;
1790          }
1791       }
1792    }
1793    return 1;
1794 }
1795 
1796 // build a table that decodes both magnitude and value of small ACs in
1797 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1798 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1799 {
1800    int i;
1801    for (i=0; i < (1 << FAST_BITS); ++i) {
1802       stbi_uc fast = h->fast[i];
1803       fast_ac[i] = 0;
1804       if (fast < 255) {
1805          int rs = h->values[fast];
1806          int run = (rs >> 4) & 15;
1807          int magbits = rs & 15;
1808          int len = h->size[fast];
1809 
1810          if (magbits && len + magbits <= FAST_BITS) {
1811             // magnitude code followed by receive_extend code
1812             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1813             int m = 1 << (magbits - 1);
1814             if (k < m) k += (~0U << magbits) + 1;
1815             // if the result is small enough, we can fit it in fast_ac table
1816             if (k >= -128 && k <= 127)
1817                fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1818          }
1819       }
1820    }
1821 }
1822 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1823 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1824 {
1825    do {
1826       unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1827       if (b == 0xff) {
1828          int c = stbi__get8(j->s);
1829          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1830          if (c != 0) {
1831             j->marker = (unsigned char) c;
1832             j->nomore = 1;
1833             return;
1834          }
1835       }
1836       j->code_buffer |= b << (24 - j->code_bits);
1837       j->code_bits += 8;
1838    } while (j->code_bits <= 24);
1839 }
1840 
1841 // (1 << n) - 1
1842 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1843 
1844 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1845 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1846 {
1847    unsigned int temp;
1848    int c,k;
1849 
1850    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1851 
1852    // look at the top FAST_BITS and determine what symbol ID it is,
1853    // if the code is <= FAST_BITS
1854    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1855    k = h->fast[c];
1856    if (k < 255) {
1857       int s = h->size[k];
1858       if (s > j->code_bits)
1859          return -1;
1860       j->code_buffer <<= s;
1861       j->code_bits -= s;
1862       return h->values[k];
1863    }
1864 
1865    // naive test is to shift the code_buffer down so k bits are
1866    // valid, then test against maxcode. To speed this up, we've
1867    // preshifted maxcode left so that it has (16-k) 0s at the
1868    // end; in other words, regardless of the number of bits, it
1869    // wants to be compared against something shifted to have 16;
1870    // that way we don't need to shift inside the loop.
1871    temp = j->code_buffer >> 16;
1872    for (k=FAST_BITS+1 ; ; ++k)
1873       if (temp < h->maxcode[k])
1874          break;
1875    if (k == 17) {
1876       // error! code not found
1877       j->code_bits -= 16;
1878       return -1;
1879    }
1880 
1881    if (k > j->code_bits)
1882       return -1;
1883 
1884    // convert the huffman code to the symbol id
1885    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1886    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1887 
1888    // convert the id to a symbol
1889    j->code_bits -= k;
1890    j->code_buffer <<= k;
1891    return h->values[c];
1892 }
1893 
1894 // bias[n] = (-1<<n) + 1
1895 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1896 
1897 // combined JPEG 'receive' and JPEG 'extend', since baseline
1898 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1899 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1900 {
1901    unsigned int k;
1902    int sgn;
1903    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1904 
1905    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1906    k = stbi_lrot(j->code_buffer, n);
1907    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1908    j->code_buffer = k & ~stbi__bmask[n];
1909    k &= stbi__bmask[n];
1910    j->code_bits -= n;
1911    return k + (stbi__jbias[n] & ~sgn);
1912 }
1913 
1914 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1915 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1916 {
1917    unsigned int k;
1918    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1919    k = stbi_lrot(j->code_buffer, n);
1920    j->code_buffer = k & ~stbi__bmask[n];
1921    k &= stbi__bmask[n];
1922    j->code_bits -= n;
1923    return k;
1924 }
1925 
stbi__jpeg_get_bit(stbi__jpeg * j)1926 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1927 {
1928    unsigned int k;
1929    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1930    k = j->code_buffer;
1931    j->code_buffer <<= 1;
1932    --j->code_bits;
1933    return k & 0x80000000;
1934 }
1935 
1936 // given a value that's at position X in the zigzag stream,
1937 // where does it appear in the 8x8 matrix coded as row-major?
1938 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
1939 {
1940     0,  1,  8, 16,  9,  2,  3, 10,
1941    17, 24, 32, 25, 18, 11,  4,  5,
1942    12, 19, 26, 33, 40, 48, 41, 34,
1943    27, 20, 13,  6,  7, 14, 21, 28,
1944    35, 42, 49, 56, 57, 50, 43, 36,
1945    29, 22, 15, 23, 30, 37, 44, 51,
1946    58, 59, 52, 45, 38, 31, 39, 46,
1947    53, 60, 61, 54, 47, 55, 62, 63,
1948    // let corrupt input sample past end
1949    63, 63, 63, 63, 63, 63, 63, 63,
1950    63, 63, 63, 63, 63, 63, 63
1951 };
1952 
1953 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1954 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1955 {
1956    int diff,dc,k;
1957    int t;
1958 
1959    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1960    t = stbi__jpeg_huff_decode(j, hdc);
1961    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1962 
1963    // 0 all the ac values now so we can do it 32-bits at a time
1964    memset(data,0,64*sizeof(data[0]));
1965 
1966    diff = t ? stbi__extend_receive(j, t) : 0;
1967    dc = j->img_comp[b].dc_pred + diff;
1968    j->img_comp[b].dc_pred = dc;
1969    data[0] = (short) (dc * dequant[0]);
1970 
1971    // decode AC components, see JPEG spec
1972    k = 1;
1973    do {
1974       unsigned int zig;
1975       int c,r,s;
1976       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1977       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1978       r = fac[c];
1979       if (r) { // fast-AC path
1980          k += (r >> 4) & 15; // run
1981          s = r & 15; // combined length
1982          j->code_buffer <<= s;
1983          j->code_bits -= s;
1984          // decode into unzigzag'd location
1985          zig = stbi__jpeg_dezigzag[k++];
1986          data[zig] = (short) ((r >> 8) * dequant[zig]);
1987       } else {
1988          int rs = stbi__jpeg_huff_decode(j, hac);
1989          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1990          s = rs & 15;
1991          r = rs >> 4;
1992          if (s == 0) {
1993             if (rs != 0xf0) break; // end block
1994             k += 16;
1995          } else {
1996             k += r;
1997             // decode into unzigzag'd location
1998             zig = stbi__jpeg_dezigzag[k++];
1999             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2000          }
2001       }
2002    } while (k < 64);
2003    return 1;
2004 }
2005 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)2006 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2007 {
2008    int diff,dc;
2009    int t;
2010    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2011 
2012    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2013 
2014    if (j->succ_high == 0) {
2015       // first scan for DC coefficient, must be first
2016       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2017       t = stbi__jpeg_huff_decode(j, hdc);
2018       diff = t ? stbi__extend_receive(j, t) : 0;
2019 
2020       dc = j->img_comp[b].dc_pred + diff;
2021       j->img_comp[b].dc_pred = dc;
2022       data[0] = (short) (dc << j->succ_low);
2023    } else {
2024       // refinement scan for DC coefficient
2025       if (stbi__jpeg_get_bit(j))
2026          data[0] += (short) (1 << j->succ_low);
2027    }
2028    return 1;
2029 }
2030 
2031 // @OPTIMIZE: store non-zigzagged during the decode passes,
2032 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)2033 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2034 {
2035    int k;
2036    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2037 
2038    if (j->succ_high == 0) {
2039       int shift = j->succ_low;
2040 
2041       if (j->eob_run) {
2042          --j->eob_run;
2043          return 1;
2044       }
2045 
2046       k = j->spec_start;
2047       do {
2048          unsigned int zig;
2049          int c,r,s;
2050          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2051          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2052          r = fac[c];
2053          if (r) { // fast-AC path
2054             k += (r >> 4) & 15; // run
2055             s = r & 15; // combined length
2056             j->code_buffer <<= s;
2057             j->code_bits -= s;
2058             zig = stbi__jpeg_dezigzag[k++];
2059             data[zig] = (short) ((r >> 8) << shift);
2060          } else {
2061             int rs = stbi__jpeg_huff_decode(j, hac);
2062             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2063             s = rs & 15;
2064             r = rs >> 4;
2065             if (s == 0) {
2066                if (r < 15) {
2067                   j->eob_run = (1 << r);
2068                   if (r)
2069                      j->eob_run += stbi__jpeg_get_bits(j, r);
2070                   --j->eob_run;
2071                   break;
2072                }
2073                k += 16;
2074             } else {
2075                k += r;
2076                zig = stbi__jpeg_dezigzag[k++];
2077                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2078             }
2079          }
2080       } while (k <= j->spec_end);
2081    } else {
2082       // refinement scan for these AC coefficients
2083 
2084       short bit = (short) (1 << j->succ_low);
2085 
2086       if (j->eob_run) {
2087          --j->eob_run;
2088          for (k = j->spec_start; k <= j->spec_end; ++k) {
2089             short *p = &data[stbi__jpeg_dezigzag[k]];
2090             if (*p != 0)
2091                if (stbi__jpeg_get_bit(j))
2092                   if ((*p & bit)==0) {
2093                      if (*p > 0)
2094                         *p += bit;
2095                      else
2096                         *p -= bit;
2097                   }
2098          }
2099       } else {
2100          k = j->spec_start;
2101          do {
2102             int r,s;
2103             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2104             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2105             s = rs & 15;
2106             r = rs >> 4;
2107             if (s == 0) {
2108                if (r < 15) {
2109                   j->eob_run = (1 << r) - 1;
2110                   if (r)
2111                      j->eob_run += stbi__jpeg_get_bits(j, r);
2112                   r = 64; // force end of block
2113                } else {
2114                   // r=15 s=0 should write 16 0s, so we just do
2115                   // a run of 15 0s and then write s (which is 0),
2116                   // so we don't have to do anything special here
2117                }
2118             } else {
2119                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2120                // sign bit
2121                if (stbi__jpeg_get_bit(j))
2122                   s = bit;
2123                else
2124                   s = -bit;
2125             }
2126 
2127             // advance by r
2128             while (k <= j->spec_end) {
2129                short *p = &data[stbi__jpeg_dezigzag[k++]];
2130                if (*p != 0) {
2131                   if (stbi__jpeg_get_bit(j))
2132                      if ((*p & bit)==0) {
2133                         if (*p > 0)
2134                            *p += bit;
2135                         else
2136                            *p -= bit;
2137                      }
2138                } else {
2139                   if (r == 0) {
2140                      *p = (short) s;
2141                      break;
2142                   }
2143                   --r;
2144                }
2145             }
2146          } while (k <= j->spec_end);
2147       }
2148    }
2149    return 1;
2150 }
2151 
2152 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2153 stbi_inline static stbi_uc stbi__clamp(int x)
2154 {
2155    // trick to use a single test to catch both cases
2156    if ((unsigned int) x > 255) {
2157       if (x < 0) return 0;
2158       if (x > 255) return 255;
2159    }
2160    return (stbi_uc) x;
2161 }
2162 
2163 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2164 #define stbi__fsh(x)  ((x) * 4096)
2165 
2166 // derived from jidctint -- DCT_ISLOW
2167 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2168    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2169    p2 = s2;                                    \
2170    p3 = s6;                                    \
2171    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2172    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2173    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2174    p2 = s0;                                    \
2175    p3 = s4;                                    \
2176    t0 = stbi__fsh(p2+p3);                      \
2177    t1 = stbi__fsh(p2-p3);                      \
2178    x0 = t0+t3;                                 \
2179    x3 = t0-t3;                                 \
2180    x1 = t1+t2;                                 \
2181    x2 = t1-t2;                                 \
2182    t0 = s7;                                    \
2183    t1 = s5;                                    \
2184    t2 = s3;                                    \
2185    t3 = s1;                                    \
2186    p3 = t0+t2;                                 \
2187    p4 = t1+t3;                                 \
2188    p1 = t0+t3;                                 \
2189    p2 = t1+t2;                                 \
2190    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2191    t0 = t0*stbi__f2f( 0.298631336f);           \
2192    t1 = t1*stbi__f2f( 2.053119869f);           \
2193    t2 = t2*stbi__f2f( 3.072711026f);           \
2194    t3 = t3*stbi__f2f( 1.501321110f);           \
2195    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2196    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2197    p3 = p3*stbi__f2f(-1.961570560f);           \
2198    p4 = p4*stbi__f2f(-0.390180644f);           \
2199    t3 += p1+p4;                                \
2200    t2 += p2+p3;                                \
2201    t1 += p2+p4;                                \
2202    t0 += p1+p3;
2203 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2204 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2205 {
2206    int i,val[64],*v=val;
2207    stbi_uc *o;
2208    short *d = data;
2209 
2210    // columns
2211    for (i=0; i < 8; ++i,++d, ++v) {
2212       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2213       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2214            && d[40]==0 && d[48]==0 && d[56]==0) {
2215          //    no shortcut                 0     seconds
2216          //    (1|2|3|4|5|6|7)==0          0     seconds
2217          //    all separate               -0.047 seconds
2218          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2219          int dcterm = d[0]*4;
2220          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2221       } else {
2222          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2223          // constants scaled things up by 1<<12; let's bring them back
2224          // down, but keep 2 extra bits of precision
2225          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2226          v[ 0] = (x0+t3) >> 10;
2227          v[56] = (x0-t3) >> 10;
2228          v[ 8] = (x1+t2) >> 10;
2229          v[48] = (x1-t2) >> 10;
2230          v[16] = (x2+t1) >> 10;
2231          v[40] = (x2-t1) >> 10;
2232          v[24] = (x3+t0) >> 10;
2233          v[32] = (x3-t0) >> 10;
2234       }
2235    }
2236 
2237    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2238       // no fast case since the first 1D IDCT spread components out
2239       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2240       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2241       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2242       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2243       // so we want to round that, which means adding 0.5 * 1<<17,
2244       // aka 65536. Also, we'll end up with -128 to 127 that we want
2245       // to encode as 0..255 by adding 128, so we'll add that before the shift
2246       x0 += 65536 + (128<<17);
2247       x1 += 65536 + (128<<17);
2248       x2 += 65536 + (128<<17);
2249       x3 += 65536 + (128<<17);
2250       // tried computing the shifts into temps, or'ing the temps to see
2251       // if any were out of range, but that was slower
2252       o[0] = stbi__clamp((x0+t3) >> 17);
2253       o[7] = stbi__clamp((x0-t3) >> 17);
2254       o[1] = stbi__clamp((x1+t2) >> 17);
2255       o[6] = stbi__clamp((x1-t2) >> 17);
2256       o[2] = stbi__clamp((x2+t1) >> 17);
2257       o[5] = stbi__clamp((x2-t1) >> 17);
2258       o[3] = stbi__clamp((x3+t0) >> 17);
2259       o[4] = stbi__clamp((x3-t0) >> 17);
2260    }
2261 }
2262 
2263 #ifdef STBI_SSE2
2264 // sse2 integer IDCT. not the fastest possible implementation but it
2265 // produces bit-identical results to the generic C version so it's
2266 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2267 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2268 {
2269    // This is constructed to match our regular (generic) integer IDCT exactly.
2270    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2271    __m128i tmp;
2272 
2273    // dot product constant: even elems=x, odd elems=y
2274    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2275 
2276    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2277    // out(1) = c1[even]*x + c1[odd]*y
2278    #define dct_rot(out0,out1, x,y,c0,c1) \
2279       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2280       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2281       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2282       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2283       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2284       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2285 
2286    // out = in << 12  (in 16-bit, out 32-bit)
2287    #define dct_widen(out, in) \
2288       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2289       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2290 
2291    // wide add
2292    #define dct_wadd(out, a, b) \
2293       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2294       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2295 
2296    // wide sub
2297    #define dct_wsub(out, a, b) \
2298       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2299       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2300 
2301    // butterfly a/b, add bias, then shift by "s" and pack
2302    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2303       { \
2304          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2305          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2306          dct_wadd(sum, abiased, b); \
2307          dct_wsub(dif, abiased, b); \
2308          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2309          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2310       }
2311 
2312    // 8-bit interleave step (for transposes)
2313    #define dct_interleave8(a, b) \
2314       tmp = a; \
2315       a = _mm_unpacklo_epi8(a, b); \
2316       b = _mm_unpackhi_epi8(tmp, b)
2317 
2318    // 16-bit interleave step (for transposes)
2319    #define dct_interleave16(a, b) \
2320       tmp = a; \
2321       a = _mm_unpacklo_epi16(a, b); \
2322       b = _mm_unpackhi_epi16(tmp, b)
2323 
2324    #define dct_pass(bias,shift) \
2325       { \
2326          /* even part */ \
2327          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2328          __m128i sum04 = _mm_add_epi16(row0, row4); \
2329          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2330          dct_widen(t0e, sum04); \
2331          dct_widen(t1e, dif04); \
2332          dct_wadd(x0, t0e, t3e); \
2333          dct_wsub(x3, t0e, t3e); \
2334          dct_wadd(x1, t1e, t2e); \
2335          dct_wsub(x2, t1e, t2e); \
2336          /* odd part */ \
2337          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2338          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2339          __m128i sum17 = _mm_add_epi16(row1, row7); \
2340          __m128i sum35 = _mm_add_epi16(row3, row5); \
2341          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2342          dct_wadd(x4, y0o, y4o); \
2343          dct_wadd(x5, y1o, y5o); \
2344          dct_wadd(x6, y2o, y5o); \
2345          dct_wadd(x7, y3o, y4o); \
2346          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2347          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2348          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2349          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2350       }
2351 
2352    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2353    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2354    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2355    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2356    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2357    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2358    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2359    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2360 
2361    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2362    __m128i bias_0 = _mm_set1_epi32(512);
2363    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2364 
2365    // load
2366    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2367    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2368    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2369    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2370    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2371    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2372    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2373    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2374 
2375    // column pass
2376    dct_pass(bias_0, 10);
2377 
2378    {
2379       // 16bit 8x8 transpose pass 1
2380       dct_interleave16(row0, row4);
2381       dct_interleave16(row1, row5);
2382       dct_interleave16(row2, row6);
2383       dct_interleave16(row3, row7);
2384 
2385       // transpose pass 2
2386       dct_interleave16(row0, row2);
2387       dct_interleave16(row1, row3);
2388       dct_interleave16(row4, row6);
2389       dct_interleave16(row5, row7);
2390 
2391       // transpose pass 3
2392       dct_interleave16(row0, row1);
2393       dct_interleave16(row2, row3);
2394       dct_interleave16(row4, row5);
2395       dct_interleave16(row6, row7);
2396    }
2397 
2398    // row pass
2399    dct_pass(bias_1, 17);
2400 
2401    {
2402       // pack
2403       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2404       __m128i p1 = _mm_packus_epi16(row2, row3);
2405       __m128i p2 = _mm_packus_epi16(row4, row5);
2406       __m128i p3 = _mm_packus_epi16(row6, row7);
2407 
2408       // 8bit 8x8 transpose pass 1
2409       dct_interleave8(p0, p2); // a0e0a1e1...
2410       dct_interleave8(p1, p3); // c0g0c1g1...
2411 
2412       // transpose pass 2
2413       dct_interleave8(p0, p1); // a0c0e0g0...
2414       dct_interleave8(p2, p3); // b0d0f0h0...
2415 
2416       // transpose pass 3
2417       dct_interleave8(p0, p2); // a0b0c0d0...
2418       dct_interleave8(p1, p3); // a4b4c4d4...
2419 
2420       // store
2421       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2422       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2423       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2424       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2425       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2426       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2427       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2428       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2429    }
2430 
2431 #undef dct_const
2432 #undef dct_rot
2433 #undef dct_widen
2434 #undef dct_wadd
2435 #undef dct_wsub
2436 #undef dct_bfly32o
2437 #undef dct_interleave8
2438 #undef dct_interleave16
2439 #undef dct_pass
2440 }
2441 
2442 #endif // STBI_SSE2
2443 
2444 #ifdef STBI_NEON
2445 
2446 // NEON integer IDCT. should produce bit-identical
2447 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2448 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2449 {
2450    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2451 
2452    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2453    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2454    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2455    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2456    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2457    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2458    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2459    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2460    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2461    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2462    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2463    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2464 
2465 #define dct_long_mul(out, inq, coeff) \
2466    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2467    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2468 
2469 #define dct_long_mac(out, acc, inq, coeff) \
2470    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2471    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2472 
2473 #define dct_widen(out, inq) \
2474    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2475    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2476 
2477 // wide add
2478 #define dct_wadd(out, a, b) \
2479    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2480    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2481 
2482 // wide sub
2483 #define dct_wsub(out, a, b) \
2484    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2485    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2486 
2487 // butterfly a/b, then shift using "shiftop" by "s" and pack
2488 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2489    { \
2490       dct_wadd(sum, a, b); \
2491       dct_wsub(dif, a, b); \
2492       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2493       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2494    }
2495 
2496 #define dct_pass(shiftop, shift) \
2497    { \
2498       /* even part */ \
2499       int16x8_t sum26 = vaddq_s16(row2, row6); \
2500       dct_long_mul(p1e, sum26, rot0_0); \
2501       dct_long_mac(t2e, p1e, row6, rot0_1); \
2502       dct_long_mac(t3e, p1e, row2, rot0_2); \
2503       int16x8_t sum04 = vaddq_s16(row0, row4); \
2504       int16x8_t dif04 = vsubq_s16(row0, row4); \
2505       dct_widen(t0e, sum04); \
2506       dct_widen(t1e, dif04); \
2507       dct_wadd(x0, t0e, t3e); \
2508       dct_wsub(x3, t0e, t3e); \
2509       dct_wadd(x1, t1e, t2e); \
2510       dct_wsub(x2, t1e, t2e); \
2511       /* odd part */ \
2512       int16x8_t sum15 = vaddq_s16(row1, row5); \
2513       int16x8_t sum17 = vaddq_s16(row1, row7); \
2514       int16x8_t sum35 = vaddq_s16(row3, row5); \
2515       int16x8_t sum37 = vaddq_s16(row3, row7); \
2516       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2517       dct_long_mul(p5o, sumodd, rot1_0); \
2518       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2519       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2520       dct_long_mul(p3o, sum37, rot2_0); \
2521       dct_long_mul(p4o, sum15, rot2_1); \
2522       dct_wadd(sump13o, p1o, p3o); \
2523       dct_wadd(sump24o, p2o, p4o); \
2524       dct_wadd(sump23o, p2o, p3o); \
2525       dct_wadd(sump14o, p1o, p4o); \
2526       dct_long_mac(x4, sump13o, row7, rot3_0); \
2527       dct_long_mac(x5, sump24o, row5, rot3_1); \
2528       dct_long_mac(x6, sump23o, row3, rot3_2); \
2529       dct_long_mac(x7, sump14o, row1, rot3_3); \
2530       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2531       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2532       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2533       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2534    }
2535 
2536    // load
2537    row0 = vld1q_s16(data + 0*8);
2538    row1 = vld1q_s16(data + 1*8);
2539    row2 = vld1q_s16(data + 2*8);
2540    row3 = vld1q_s16(data + 3*8);
2541    row4 = vld1q_s16(data + 4*8);
2542    row5 = vld1q_s16(data + 5*8);
2543    row6 = vld1q_s16(data + 6*8);
2544    row7 = vld1q_s16(data + 7*8);
2545 
2546    // add DC bias
2547    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2548 
2549    // column pass
2550    dct_pass(vrshrn_n_s32, 10);
2551 
2552    // 16bit 8x8 transpose
2553    {
2554 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2555 // whether compilers actually get this is another story, sadly.
2556 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2557 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2558 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2559 
2560       // pass 1
2561       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2562       dct_trn16(row2, row3);
2563       dct_trn16(row4, row5);
2564       dct_trn16(row6, row7);
2565 
2566       // pass 2
2567       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2568       dct_trn32(row1, row3);
2569       dct_trn32(row4, row6);
2570       dct_trn32(row5, row7);
2571 
2572       // pass 3
2573       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2574       dct_trn64(row1, row5);
2575       dct_trn64(row2, row6);
2576       dct_trn64(row3, row7);
2577 
2578 #undef dct_trn16
2579 #undef dct_trn32
2580 #undef dct_trn64
2581    }
2582 
2583    // row pass
2584    // vrshrn_n_s32 only supports shifts up to 16, we need
2585    // 17. so do a non-rounding shift of 16 first then follow
2586    // up with a rounding shift by 1.
2587    dct_pass(vshrn_n_s32, 16);
2588 
2589    {
2590       // pack and round
2591       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2592       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2593       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2594       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2595       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2596       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2597       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2598       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2599 
2600       // again, these can translate into one instruction, but often don't.
2601 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2602 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2603 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2604 
2605       // sadly can't use interleaved stores here since we only write
2606       // 8 bytes to each scan line!
2607 
2608       // 8x8 8-bit transpose pass 1
2609       dct_trn8_8(p0, p1);
2610       dct_trn8_8(p2, p3);
2611       dct_trn8_8(p4, p5);
2612       dct_trn8_8(p6, p7);
2613 
2614       // pass 2
2615       dct_trn8_16(p0, p2);
2616       dct_trn8_16(p1, p3);
2617       dct_trn8_16(p4, p6);
2618       dct_trn8_16(p5, p7);
2619 
2620       // pass 3
2621       dct_trn8_32(p0, p4);
2622       dct_trn8_32(p1, p5);
2623       dct_trn8_32(p2, p6);
2624       dct_trn8_32(p3, p7);
2625 
2626       // store
2627       vst1_u8(out, p0); out += out_stride;
2628       vst1_u8(out, p1); out += out_stride;
2629       vst1_u8(out, p2); out += out_stride;
2630       vst1_u8(out, p3); out += out_stride;
2631       vst1_u8(out, p4); out += out_stride;
2632       vst1_u8(out, p5); out += out_stride;
2633       vst1_u8(out, p6); out += out_stride;
2634       vst1_u8(out, p7);
2635 
2636 #undef dct_trn8_8
2637 #undef dct_trn8_16
2638 #undef dct_trn8_32
2639    }
2640 
2641 #undef dct_long_mul
2642 #undef dct_long_mac
2643 #undef dct_widen
2644 #undef dct_wadd
2645 #undef dct_wsub
2646 #undef dct_bfly32o
2647 #undef dct_pass
2648 }
2649 
2650 #endif // STBI_NEON
2651 
2652 #define STBI__MARKER_none  0xff
2653 // if there's a pending marker from the entropy stream, return that
2654 // otherwise, fetch from the stream and get a marker. if there's no
2655 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2656 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2657 {
2658    stbi_uc x;
2659    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2660    x = stbi__get8(j->s);
2661    if (x != 0xff) return STBI__MARKER_none;
2662    while (x == 0xff)
2663       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2664    return x;
2665 }
2666 
2667 // in each scan, we'll have scan_n components, and the order
2668 // of the components is specified by order[]
2669 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2670 
2671 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2672 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2673 static void stbi__jpeg_reset(stbi__jpeg *j)
2674 {
2675    j->code_bits = 0;
2676    j->code_buffer = 0;
2677    j->nomore = 0;
2678    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2679    j->marker = STBI__MARKER_none;
2680    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2681    j->eob_run = 0;
2682    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2683    // since we don't even allow 1<<30 pixels
2684 }
2685 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2686 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2687 {
2688    stbi__jpeg_reset(z);
2689    if (!z->progressive) {
2690       if (z->scan_n == 1) {
2691          int i,j;
2692          STBI_SIMD_ALIGN(short, data[64]);
2693          int n = z->order[0];
2694          // non-interleaved data, we just need to process one block at a time,
2695          // in trivial scanline order
2696          // number of blocks to do just depends on how many actual "pixels" this
2697          // component has, independent of interleaved MCU blocking and such
2698          int w = (z->img_comp[n].x+7) >> 3;
2699          int h = (z->img_comp[n].y+7) >> 3;
2700          for (j=0; j < h; ++j) {
2701             for (i=0; i < w; ++i) {
2702                int ha = z->img_comp[n].ha;
2703                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2704                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2705                // every data block is an MCU, so countdown the restart interval
2706                if (--z->todo <= 0) {
2707                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2708                   // if it's NOT a restart, then just bail, so we get corrupt data
2709                   // rather than no data
2710                   if (!STBI__RESTART(z->marker)) return 1;
2711                   stbi__jpeg_reset(z);
2712                }
2713             }
2714          }
2715          return 1;
2716       } else { // interleaved
2717          int i,j,k,x,y;
2718          STBI_SIMD_ALIGN(short, data[64]);
2719          for (j=0; j < z->img_mcu_y; ++j) {
2720             for (i=0; i < z->img_mcu_x; ++i) {
2721                // scan an interleaved mcu... process scan_n components in order
2722                for (k=0; k < z->scan_n; ++k) {
2723                   int n = z->order[k];
2724                   // scan out an mcu's worth of this component; that's just determined
2725                   // by the basic H and V specified for the component
2726                   for (y=0; y < z->img_comp[n].v; ++y) {
2727                      for (x=0; x < z->img_comp[n].h; ++x) {
2728                         int x2 = (i*z->img_comp[n].h + x)*8;
2729                         int y2 = (j*z->img_comp[n].v + y)*8;
2730                         int ha = z->img_comp[n].ha;
2731                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2732                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2733                      }
2734                   }
2735                }
2736                // after all interleaved components, that's an interleaved MCU,
2737                // so now count down the restart interval
2738                if (--z->todo <= 0) {
2739                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2740                   if (!STBI__RESTART(z->marker)) return 1;
2741                   stbi__jpeg_reset(z);
2742                }
2743             }
2744          }
2745          return 1;
2746       }
2747    } else {
2748       if (z->scan_n == 1) {
2749          int i,j;
2750          int n = z->order[0];
2751          // non-interleaved data, we just need to process one block at a time,
2752          // in trivial scanline order
2753          // number of blocks to do just depends on how many actual "pixels" this
2754          // component has, independent of interleaved MCU blocking and such
2755          int w = (z->img_comp[n].x+7) >> 3;
2756          int h = (z->img_comp[n].y+7) >> 3;
2757          for (j=0; j < h; ++j) {
2758             for (i=0; i < w; ++i) {
2759                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2760                if (z->spec_start == 0) {
2761                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2762                      return 0;
2763                } else {
2764                   int ha = z->img_comp[n].ha;
2765                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2766                      return 0;
2767                }
2768                // every data block is an MCU, so countdown the restart interval
2769                if (--z->todo <= 0) {
2770                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2771                   if (!STBI__RESTART(z->marker)) return 1;
2772                   stbi__jpeg_reset(z);
2773                }
2774             }
2775          }
2776          return 1;
2777       } else { // interleaved
2778          int i,j,k,x,y;
2779          for (j=0; j < z->img_mcu_y; ++j) {
2780             for (i=0; i < z->img_mcu_x; ++i) {
2781                // scan an interleaved mcu... process scan_n components in order
2782                for (k=0; k < z->scan_n; ++k) {
2783                   int n = z->order[k];
2784                   // scan out an mcu's worth of this component; that's just determined
2785                   // by the basic H and V specified for the component
2786                   for (y=0; y < z->img_comp[n].v; ++y) {
2787                      for (x=0; x < z->img_comp[n].h; ++x) {
2788                         int x2 = (i*z->img_comp[n].h + x);
2789                         int y2 = (j*z->img_comp[n].v + y);
2790                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2791                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2792                            return 0;
2793                      }
2794                   }
2795                }
2796                // after all interleaved components, that's an interleaved MCU,
2797                // so now count down the restart interval
2798                if (--z->todo <= 0) {
2799                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2800                   if (!STBI__RESTART(z->marker)) return 1;
2801                   stbi__jpeg_reset(z);
2802                }
2803             }
2804          }
2805          return 1;
2806       }
2807    }
2808 }
2809 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2810 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2811 {
2812    int i;
2813    for (i=0; i < 64; ++i)
2814       data[i] *= dequant[i];
2815 }
2816 
stbi__jpeg_finish(stbi__jpeg * z)2817 static void stbi__jpeg_finish(stbi__jpeg *z)
2818 {
2819    if (z->progressive) {
2820       // dequantize and idct the data
2821       int i,j,n;
2822       for (n=0; n < z->s->img_n; ++n) {
2823          int w = (z->img_comp[n].x+7) >> 3;
2824          int h = (z->img_comp[n].y+7) >> 3;
2825          for (j=0; j < h; ++j) {
2826             for (i=0; i < w; ++i) {
2827                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2828                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2829                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2830             }
2831          }
2832       }
2833    }
2834 }
2835 
stbi__process_marker(stbi__jpeg * z,int m)2836 static int stbi__process_marker(stbi__jpeg *z, int m)
2837 {
2838    int L;
2839    switch (m) {
2840       case STBI__MARKER_none: // no marker found
2841          return stbi__err("expected marker","Corrupt JPEG");
2842 
2843       case 0xDD: // DRI - specify restart interval
2844          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2845          z->restart_interval = stbi__get16be(z->s);
2846          return 1;
2847 
2848       case 0xDB: // DQT - define quantization table
2849          L = stbi__get16be(z->s)-2;
2850          while (L > 0) {
2851             int q = stbi__get8(z->s);
2852             int p = q >> 4, sixteen = (p != 0);
2853             int t = q & 15,i;
2854             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2855             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2856 
2857             for (i=0; i < 64; ++i)
2858                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2859             L -= (sixteen ? 129 : 65);
2860          }
2861          return L==0;
2862 
2863       case 0xC4: // DHT - define huffman table
2864          L = stbi__get16be(z->s)-2;
2865          while (L > 0) {
2866             stbi_uc *v;
2867             int sizes[16],i,n=0;
2868             int q = stbi__get8(z->s);
2869             int tc = q >> 4;
2870             int th = q & 15;
2871             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2872             for (i=0; i < 16; ++i) {
2873                sizes[i] = stbi__get8(z->s);
2874                n += sizes[i];
2875             }
2876             L -= 17;
2877             if (tc == 0) {
2878                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2879                v = z->huff_dc[th].values;
2880             } else {
2881                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2882                v = z->huff_ac[th].values;
2883             }
2884             for (i=0; i < n; ++i)
2885                v[i] = stbi__get8(z->s);
2886             if (tc != 0)
2887                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2888             L -= n;
2889          }
2890          return L==0;
2891    }
2892 
2893    // check for comment block or APP blocks
2894    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2895       L = stbi__get16be(z->s);
2896       if (L < 2) {
2897          if (m == 0xFE)
2898             return stbi__err("bad COM len","Corrupt JPEG");
2899          else
2900             return stbi__err("bad APP len","Corrupt JPEG");
2901       }
2902       L -= 2;
2903 
2904       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2905          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2906          int ok = 1;
2907          int i;
2908          for (i=0; i < 5; ++i)
2909             if (stbi__get8(z->s) != tag[i])
2910                ok = 0;
2911          L -= 5;
2912          if (ok)
2913             z->jfif = 1;
2914       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2915          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2916          int ok = 1;
2917          int i;
2918          for (i=0; i < 6; ++i)
2919             if (stbi__get8(z->s) != tag[i])
2920                ok = 0;
2921          L -= 6;
2922          if (ok) {
2923             stbi__get8(z->s); // version
2924             stbi__get16be(z->s); // flags0
2925             stbi__get16be(z->s); // flags1
2926             z->app14_color_transform = stbi__get8(z->s); // color transform
2927             L -= 6;
2928          }
2929       }
2930 
2931       stbi__skip(z->s, L);
2932       return 1;
2933    }
2934 
2935    return stbi__err("unknown marker","Corrupt JPEG");
2936 }
2937 
2938 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2939 static int stbi__process_scan_header(stbi__jpeg *z)
2940 {
2941    int i;
2942    int Ls = stbi__get16be(z->s);
2943    z->scan_n = stbi__get8(z->s);
2944    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2945    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2946    for (i=0; i < z->scan_n; ++i) {
2947       int id = stbi__get8(z->s), which;
2948       int q = stbi__get8(z->s);
2949       for (which = 0; which < z->s->img_n; ++which)
2950          if (z->img_comp[which].id == id)
2951             break;
2952       if (which == z->s->img_n) return 0; // no match
2953       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2954       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2955       z->order[i] = which;
2956    }
2957 
2958    {
2959       int aa;
2960       z->spec_start = stbi__get8(z->s);
2961       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2962       aa = stbi__get8(z->s);
2963       z->succ_high = (aa >> 4);
2964       z->succ_low  = (aa & 15);
2965       if (z->progressive) {
2966          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2967             return stbi__err("bad SOS", "Corrupt JPEG");
2968       } else {
2969          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2970          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2971          z->spec_end = 63;
2972       }
2973    }
2974 
2975    return 1;
2976 }
2977 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2978 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2979 {
2980    int i;
2981    for (i=0; i < ncomp; ++i) {
2982       if (z->img_comp[i].raw_data) {
2983          STBI_FREE(z->img_comp[i].raw_data);
2984          z->img_comp[i].raw_data = NULL;
2985          z->img_comp[i].data = NULL;
2986       }
2987       if (z->img_comp[i].raw_coeff) {
2988          STBI_FREE(z->img_comp[i].raw_coeff);
2989          z->img_comp[i].raw_coeff = 0;
2990          z->img_comp[i].coeff = 0;
2991       }
2992       if (z->img_comp[i].linebuf) {
2993          STBI_FREE(z->img_comp[i].linebuf);
2994          z->img_comp[i].linebuf = NULL;
2995       }
2996    }
2997    return why;
2998 }
2999 
stbi__process_frame_header(stbi__jpeg * z,int scan)3000 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3001 {
3002    stbi__context *s = z->s;
3003    int Lf,p,i,q, h_max=1,v_max=1,c;
3004    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3005    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3006    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3007    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3008    c = stbi__get8(s);
3009    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3010    s->img_n = c;
3011    for (i=0; i < c; ++i) {
3012       z->img_comp[i].data = NULL;
3013       z->img_comp[i].linebuf = NULL;
3014    }
3015 
3016    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3017 
3018    z->rgb = 0;
3019    for (i=0; i < s->img_n; ++i) {
3020       static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3021       z->img_comp[i].id = stbi__get8(s);
3022       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3023          ++z->rgb;
3024       q = stbi__get8(s);
3025       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3026       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3027       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3028    }
3029 
3030    if (scan != STBI__SCAN_load) return 1;
3031 
3032    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3033 
3034    for (i=0; i < s->img_n; ++i) {
3035       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3036       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3037    }
3038 
3039    // compute interleaved mcu info
3040    z->img_h_max = h_max;
3041    z->img_v_max = v_max;
3042    z->img_mcu_w = h_max * 8;
3043    z->img_mcu_h = v_max * 8;
3044    // these sizes can't be more than 17 bits
3045    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3046    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3047 
3048    for (i=0; i < s->img_n; ++i) {
3049       // number of effective pixels (e.g. for non-interleaved MCU)
3050       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3051       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3052       // to simplify generation, we'll allocate enough memory to decode
3053       // the bogus oversized data from using interleaved MCUs and their
3054       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3055       // discard the extra data until colorspace conversion
3056       //
3057       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3058       // so these muls can't overflow with 32-bit ints (which we require)
3059       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3060       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3061       z->img_comp[i].coeff = 0;
3062       z->img_comp[i].raw_coeff = 0;
3063       z->img_comp[i].linebuf = NULL;
3064       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3065       if (z->img_comp[i].raw_data == NULL)
3066          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3067       // align blocks for idct using mmx/sse
3068       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3069       if (z->progressive) {
3070          // w2, h2 are multiples of 8 (see above)
3071          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3072          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3073          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3074          if (z->img_comp[i].raw_coeff == NULL)
3075             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3076          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3077       }
3078    }
3079 
3080    return 1;
3081 }
3082 
3083 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3084 #define stbi__DNL(x)         ((x) == 0xdc)
3085 #define stbi__SOI(x)         ((x) == 0xd8)
3086 #define stbi__EOI(x)         ((x) == 0xd9)
3087 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3088 #define stbi__SOS(x)         ((x) == 0xda)
3089 
3090 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3091 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3092 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3093 {
3094    int m;
3095    z->jfif = 0;
3096    z->app14_color_transform = -1; // valid values are 0,1,2
3097    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3098    m = stbi__get_marker(z);
3099    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3100    if (scan == STBI__SCAN_type) return 1;
3101    m = stbi__get_marker(z);
3102    while (!stbi__SOF(m)) {
3103       if (!stbi__process_marker(z,m)) return 0;
3104       m = stbi__get_marker(z);
3105       while (m == STBI__MARKER_none) {
3106          // some files have extra padding after their blocks, so ok, we'll scan
3107          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3108          m = stbi__get_marker(z);
3109       }
3110    }
3111    z->progressive = stbi__SOF_progressive(m);
3112    if (!stbi__process_frame_header(z, scan)) return 0;
3113    return 1;
3114 }
3115 
3116 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3117 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3118 {
3119    int m;
3120    for (m = 0; m < 4; m++) {
3121       j->img_comp[m].raw_data = NULL;
3122       j->img_comp[m].raw_coeff = NULL;
3123    }
3124    j->restart_interval = 0;
3125    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3126    m = stbi__get_marker(j);
3127    while (!stbi__EOI(m)) {
3128       if (stbi__SOS(m)) {
3129          if (!stbi__process_scan_header(j)) return 0;
3130          if (!stbi__parse_entropy_coded_data(j)) return 0;
3131          if (j->marker == STBI__MARKER_none ) {
3132             // handle 0s at the end of image data from IP Kamera 9060
3133             while (!stbi__at_eof(j->s)) {
3134                int x = stbi__get8(j->s);
3135                if (x == 255) {
3136                   j->marker = stbi__get8(j->s);
3137                   break;
3138                }
3139             }
3140             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3141          }
3142       } else if (stbi__DNL(m)) {
3143          int Ld = stbi__get16be(j->s);
3144          stbi__uint32 NL = stbi__get16be(j->s);
3145          if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3146          if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3147       } else {
3148          if (!stbi__process_marker(j, m)) return 0;
3149       }
3150       m = stbi__get_marker(j);
3151    }
3152    if (j->progressive)
3153       stbi__jpeg_finish(j);
3154    return 1;
3155 }
3156 
3157 // static jfif-centered resampling (across block boundaries)
3158 
3159 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3160                                     int w, int hs);
3161 
3162 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3163 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3164 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3165 {
3166    STBI_NOTUSED(out);
3167    STBI_NOTUSED(in_far);
3168    STBI_NOTUSED(w);
3169    STBI_NOTUSED(hs);
3170    return in_near;
3171 }
3172 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3173 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3174 {
3175    // need to generate two samples vertically for every one in input
3176    int i;
3177    STBI_NOTUSED(hs);
3178    for (i=0; i < w; ++i)
3179       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3180    return out;
3181 }
3182 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3183 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3184 {
3185    // need to generate two samples horizontally for every one in input
3186    int i;
3187    stbi_uc *input = in_near;
3188 
3189    if (w == 1) {
3190       // if only one sample, can't do any interpolation
3191       out[0] = out[1] = input[0];
3192       return out;
3193    }
3194 
3195    out[0] = input[0];
3196    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3197    for (i=1; i < w-1; ++i) {
3198       int n = 3*input[i]+2;
3199       out[i*2+0] = stbi__div4(n+input[i-1]);
3200       out[i*2+1] = stbi__div4(n+input[i+1]);
3201    }
3202    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3203    out[i*2+1] = input[w-1];
3204 
3205    STBI_NOTUSED(in_far);
3206    STBI_NOTUSED(hs);
3207 
3208    return out;
3209 }
3210 
3211 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3212 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3213 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3214 {
3215    // need to generate 2x2 samples for every one in input
3216    int i,t0,t1;
3217    if (w == 1) {
3218       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3219       return out;
3220    }
3221 
3222    t1 = 3*in_near[0] + in_far[0];
3223    out[0] = stbi__div4(t1+2);
3224    for (i=1; i < w; ++i) {
3225       t0 = t1;
3226       t1 = 3*in_near[i]+in_far[i];
3227       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3228       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3229    }
3230    out[w*2-1] = stbi__div4(t1+2);
3231 
3232    STBI_NOTUSED(hs);
3233 
3234    return out;
3235 }
3236 
3237 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3238 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3239 {
3240    // need to generate 2x2 samples for every one in input
3241    int i=0,t0,t1;
3242 
3243    if (w == 1) {
3244       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3245       return out;
3246    }
3247 
3248    t1 = 3*in_near[0] + in_far[0];
3249    // process groups of 8 pixels for as long as we can.
3250    // note we can't handle the last pixel in a row in this loop
3251    // because we need to handle the filter boundary conditions.
3252    for (; i < ((w-1) & ~7); i += 8) {
3253 #if defined(STBI_SSE2)
3254       // load and perform the vertical filtering pass
3255       // this uses 3*x + y = 4*x + (y - x)
3256       __m128i zero  = _mm_setzero_si128();
3257       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3258       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3259       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3260       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3261       __m128i diff  = _mm_sub_epi16(farw, nearw);
3262       __m128i nears = _mm_slli_epi16(nearw, 2);
3263       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3264 
3265       // horizontal filter works the same based on shifted vers of current
3266       // row. "prev" is current row shifted right by 1 pixel; we need to
3267       // insert the previous pixel value (from t1).
3268       // "next" is current row shifted left by 1 pixel, with first pixel
3269       // of next block of 8 pixels added in.
3270       __m128i prv0 = _mm_slli_si128(curr, 2);
3271       __m128i nxt0 = _mm_srli_si128(curr, 2);
3272       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3273       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3274 
3275       // horizontal filter, polyphase implementation since it's convenient:
3276       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3277       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3278       // note the shared term.
3279       __m128i bias  = _mm_set1_epi16(8);
3280       __m128i curs = _mm_slli_epi16(curr, 2);
3281       __m128i prvd = _mm_sub_epi16(prev, curr);
3282       __m128i nxtd = _mm_sub_epi16(next, curr);
3283       __m128i curb = _mm_add_epi16(curs, bias);
3284       __m128i even = _mm_add_epi16(prvd, curb);
3285       __m128i odd  = _mm_add_epi16(nxtd, curb);
3286 
3287       // interleave even and odd pixels, then undo scaling.
3288       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3289       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3290       __m128i de0  = _mm_srli_epi16(int0, 4);
3291       __m128i de1  = _mm_srli_epi16(int1, 4);
3292 
3293       // pack and write output
3294       __m128i outv = _mm_packus_epi16(de0, de1);
3295       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3296 #elif defined(STBI_NEON)
3297       // load and perform the vertical filtering pass
3298       // this uses 3*x + y = 4*x + (y - x)
3299       uint8x8_t farb  = vld1_u8(in_far + i);
3300       uint8x8_t nearb = vld1_u8(in_near + i);
3301       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3302       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3303       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3304 
3305       // horizontal filter works the same based on shifted vers of current
3306       // row. "prev" is current row shifted right by 1 pixel; we need to
3307       // insert the previous pixel value (from t1).
3308       // "next" is current row shifted left by 1 pixel, with first pixel
3309       // of next block of 8 pixels added in.
3310       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3311       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3312       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3313       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3314 
3315       // horizontal filter, polyphase implementation since it's convenient:
3316       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3317       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3318       // note the shared term.
3319       int16x8_t curs = vshlq_n_s16(curr, 2);
3320       int16x8_t prvd = vsubq_s16(prev, curr);
3321       int16x8_t nxtd = vsubq_s16(next, curr);
3322       int16x8_t even = vaddq_s16(curs, prvd);
3323       int16x8_t odd  = vaddq_s16(curs, nxtd);
3324 
3325       // undo scaling and round, then store with even/odd phases interleaved
3326       uint8x8x2_t o;
3327       o.val[0] = vqrshrun_n_s16(even, 4);
3328       o.val[1] = vqrshrun_n_s16(odd,  4);
3329       vst2_u8(out + i*2, o);
3330 #endif
3331 
3332       // "previous" value for next iter
3333       t1 = 3*in_near[i+7] + in_far[i+7];
3334    }
3335 
3336    t0 = t1;
3337    t1 = 3*in_near[i] + in_far[i];
3338    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3339 
3340    for (++i; i < w; ++i) {
3341       t0 = t1;
3342       t1 = 3*in_near[i]+in_far[i];
3343       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3344       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3345    }
3346    out[w*2-1] = stbi__div4(t1+2);
3347 
3348    STBI_NOTUSED(hs);
3349 
3350    return out;
3351 }
3352 #endif
3353 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3354 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3355 {
3356    // resample with nearest-neighbor
3357    int i,j;
3358    STBI_NOTUSED(in_far);
3359    for (i=0; i < w; ++i)
3360       for (j=0; j < hs; ++j)
3361          out[i*hs+j] = in_near[i];
3362    return out;
3363 }
3364 
3365 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3366 // to make sure the code produces the same results in both SIMD and scalar
3367 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3368 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3369 {
3370    int i;
3371    for (i=0; i < count; ++i) {
3372       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3373       int r,g,b;
3374       int cr = pcr[i] - 128;
3375       int cb = pcb[i] - 128;
3376       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3377       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3378       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3379       r >>= 20;
3380       g >>= 20;
3381       b >>= 20;
3382       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3383       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3384       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3385       out[0] = (stbi_uc)r;
3386       out[1] = (stbi_uc)g;
3387       out[2] = (stbi_uc)b;
3388       out[3] = 255;
3389       out += step;
3390    }
3391 }
3392 
3393 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3394 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3395 {
3396    int i = 0;
3397 
3398 #ifdef STBI_SSE2
3399    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3400    // it's useful in practice (you wouldn't use it for textures, for example).
3401    // so just accelerate step == 4 case.
3402    if (step == 4) {
3403       // this is a fairly straightforward implementation and not super-optimized.
3404       __m128i signflip  = _mm_set1_epi8(-0x80);
3405       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3406       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3407       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3408       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3409       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3410       __m128i xw = _mm_set1_epi16(255); // alpha channel
3411 
3412       for (; i+7 < count; i += 8) {
3413          // load
3414          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3415          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3416          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3417          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3418          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3419 
3420          // unpack to short (and left-shift cr, cb by 8)
3421          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3422          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3423          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3424 
3425          // color transform
3426          __m128i yws = _mm_srli_epi16(yw, 4);
3427          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3428          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3429          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3430          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3431          __m128i rws = _mm_add_epi16(cr0, yws);
3432          __m128i gwt = _mm_add_epi16(cb0, yws);
3433          __m128i bws = _mm_add_epi16(yws, cb1);
3434          __m128i gws = _mm_add_epi16(gwt, cr1);
3435 
3436          // descale
3437          __m128i rw = _mm_srai_epi16(rws, 4);
3438          __m128i bw = _mm_srai_epi16(bws, 4);
3439          __m128i gw = _mm_srai_epi16(gws, 4);
3440 
3441          // back to byte, set up for transpose
3442          __m128i brb = _mm_packus_epi16(rw, bw);
3443          __m128i gxb = _mm_packus_epi16(gw, xw);
3444 
3445          // transpose to interleave channels
3446          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3447          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3448          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3449          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3450 
3451          // store
3452          _mm_storeu_si128((__m128i *) (out + 0), o0);
3453          _mm_storeu_si128((__m128i *) (out + 16), o1);
3454          out += 32;
3455       }
3456    }
3457 #endif
3458 
3459 #ifdef STBI_NEON
3460    // in this version, step=3 support would be easy to add. but is there demand?
3461    if (step == 4) {
3462       // this is a fairly straightforward implementation and not super-optimized.
3463       uint8x8_t signflip = vdup_n_u8(0x80);
3464       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3465       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3466       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3467       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3468 
3469       for (; i+7 < count; i += 8) {
3470          // load
3471          uint8x8_t y_bytes  = vld1_u8(y + i);
3472          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3473          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3474          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3475          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3476 
3477          // expand to s16
3478          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3479          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3480          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3481 
3482          // color transform
3483          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3484          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3485          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3486          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3487          int16x8_t rws = vaddq_s16(yws, cr0);
3488          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3489          int16x8_t bws = vaddq_s16(yws, cb1);
3490 
3491          // undo scaling, round, convert to byte
3492          uint8x8x4_t o;
3493          o.val[0] = vqrshrun_n_s16(rws, 4);
3494          o.val[1] = vqrshrun_n_s16(gws, 4);
3495          o.val[2] = vqrshrun_n_s16(bws, 4);
3496          o.val[3] = vdup_n_u8(255);
3497 
3498          // store, interleaving r/g/b/a
3499          vst4_u8(out, o);
3500          out += 8*4;
3501       }
3502    }
3503 #endif
3504 
3505    for (; i < count; ++i) {
3506       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3507       int r,g,b;
3508       int cr = pcr[i] - 128;
3509       int cb = pcb[i] - 128;
3510       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3511       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3512       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3513       r >>= 20;
3514       g >>= 20;
3515       b >>= 20;
3516       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3517       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3518       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3519       out[0] = (stbi_uc)r;
3520       out[1] = (stbi_uc)g;
3521       out[2] = (stbi_uc)b;
3522       out[3] = 255;
3523       out += step;
3524    }
3525 }
3526 #endif
3527 
3528 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3529 static void stbi__setup_jpeg(stbi__jpeg *j)
3530 {
3531    j->idct_block_kernel = stbi__idct_block;
3532    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3533    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3534 
3535 #ifdef STBI_SSE2
3536    if (stbi__sse2_available()) {
3537       j->idct_block_kernel = stbi__idct_simd;
3538       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3539       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3540    }
3541 #endif
3542 
3543 #ifdef STBI_NEON
3544    j->idct_block_kernel = stbi__idct_simd;
3545    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3546    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3547 #endif
3548 }
3549 
3550 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3551 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3552 {
3553    stbi__free_jpeg_components(j, j->s->img_n, 0);
3554 }
3555 
3556 typedef struct
3557 {
3558    resample_row_func resample;
3559    stbi_uc *line0,*line1;
3560    int hs,vs;   // expansion factor in each axis
3561    int w_lores; // horizontal pixels pre-expansion
3562    int ystep;   // how far through vertical expansion we are
3563    int ypos;    // which pre-expansion row we're on
3564 } stbi__resample;
3565 
3566 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3567 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3568 {
3569    unsigned int t = x*y + 128;
3570    return (stbi_uc) ((t + (t >>8)) >> 8);
3571 }
3572 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3573 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3574 {
3575    int n, decode_n, is_rgb;
3576    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3577 
3578    // validate req_comp
3579    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3580 
3581    // load a jpeg image from whichever source, but leave in YCbCr format
3582    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3583 
3584    // determine actual number of components to generate
3585    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3586 
3587    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3588 
3589    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3590       decode_n = 1;
3591    else
3592       decode_n = z->s->img_n;
3593 
3594    // resample and color-convert
3595    {
3596       int k;
3597       unsigned int i,j;
3598       stbi_uc *output;
3599       stbi_uc *coutput[4];
3600 
3601       stbi__resample res_comp[4];
3602 
3603       for (k=0; k < decode_n; ++k) {
3604          stbi__resample *r = &res_comp[k];
3605 
3606          // allocate line buffer big enough for upsampling off the edges
3607          // with upsample factor of 4
3608          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3609          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3610 
3611          r->hs      = z->img_h_max / z->img_comp[k].h;
3612          r->vs      = z->img_v_max / z->img_comp[k].v;
3613          r->ystep   = r->vs >> 1;
3614          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3615          r->ypos    = 0;
3616          r->line0   = r->line1 = z->img_comp[k].data;
3617 
3618          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3619          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3620          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3621          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3622          else                               r->resample = stbi__resample_row_generic;
3623       }
3624 
3625       // can't error after this so, this is safe
3626       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3627       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3628 
3629       // now go ahead and resample
3630       for (j=0; j < z->s->img_y; ++j) {
3631          stbi_uc *out = output + n * z->s->img_x * j;
3632          for (k=0; k < decode_n; ++k) {
3633             stbi__resample *r = &res_comp[k];
3634             int y_bot = r->ystep >= (r->vs >> 1);
3635             coutput[k] = r->resample(z->img_comp[k].linebuf,
3636                                      y_bot ? r->line1 : r->line0,
3637                                      y_bot ? r->line0 : r->line1,
3638                                      r->w_lores, r->hs);
3639             if (++r->ystep >= r->vs) {
3640                r->ystep = 0;
3641                r->line0 = r->line1;
3642                if (++r->ypos < z->img_comp[k].y)
3643                   r->line1 += z->img_comp[k].w2;
3644             }
3645          }
3646          if (n >= 3) {
3647             stbi_uc *y = coutput[0];
3648             if (z->s->img_n == 3) {
3649                if (is_rgb) {
3650                   for (i=0; i < z->s->img_x; ++i) {
3651                      out[0] = y[i];
3652                      out[1] = coutput[1][i];
3653                      out[2] = coutput[2][i];
3654                      out[3] = 255;
3655                      out += n;
3656                   }
3657                } else {
3658                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3659                }
3660             } else if (z->s->img_n == 4) {
3661                if (z->app14_color_transform == 0) { // CMYK
3662                   for (i=0; i < z->s->img_x; ++i) {
3663                      stbi_uc m = coutput[3][i];
3664                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3665                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3666                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3667                      out[3] = 255;
3668                      out += n;
3669                   }
3670                } else if (z->app14_color_transform == 2) { // YCCK
3671                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3672                   for (i=0; i < z->s->img_x; ++i) {
3673                      stbi_uc m = coutput[3][i];
3674                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3675                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3676                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3677                      out += n;
3678                   }
3679                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3680                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3681                }
3682             } else
3683                for (i=0; i < z->s->img_x; ++i) {
3684                   out[0] = out[1] = out[2] = y[i];
3685                   out[3] = 255; // not used if n==3
3686                   out += n;
3687                }
3688          } else {
3689             if (is_rgb) {
3690                if (n == 1)
3691                   for (i=0; i < z->s->img_x; ++i)
3692                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3693                else {
3694                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3695                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3696                      out[1] = 255;
3697                   }
3698                }
3699             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3700                for (i=0; i < z->s->img_x; ++i) {
3701                   stbi_uc m = coutput[3][i];
3702                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3703                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3704                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3705                   out[0] = stbi__compute_y(r, g, b);
3706                   out[1] = 255;
3707                   out += n;
3708                }
3709             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3710                for (i=0; i < z->s->img_x; ++i) {
3711                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3712                   out[1] = 255;
3713                   out += n;
3714                }
3715             } else {
3716                stbi_uc *y = coutput[0];
3717                if (n == 1)
3718                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3719                else
3720                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3721             }
3722          }
3723       }
3724       stbi__cleanup_jpeg(z);
3725       *out_x = z->s->img_x;
3726       *out_y = z->s->img_y;
3727       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3728       return output;
3729    }
3730 }
3731 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3732 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3733 {
3734    unsigned char* result;
3735    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3736    STBI_NOTUSED(ri);
3737    j->s = s;
3738    stbi__setup_jpeg(j);
3739    result = load_jpeg_image(j, x,y,comp,req_comp);
3740    STBI_FREE(j);
3741    return result;
3742 }
3743 
stbi__jpeg_test(stbi__context * s)3744 static int stbi__jpeg_test(stbi__context *s)
3745 {
3746    int r;
3747    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3748    j->s = s;
3749    stbi__setup_jpeg(j);
3750    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3751    stbi__rewind(s);
3752    STBI_FREE(j);
3753    return r;
3754 }
3755 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3756 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3757 {
3758    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3759       stbi__rewind( j->s );
3760       return 0;
3761    }
3762    if (x) *x = j->s->img_x;
3763    if (y) *y = j->s->img_y;
3764    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3765    return 1;
3766 }
3767 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3768 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3769 {
3770    int result;
3771    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3772    j->s = s;
3773    result = stbi__jpeg_info_raw(j, x, y, comp);
3774    STBI_FREE(j);
3775    return result;
3776 }
3777 #endif
3778 
3779 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3780 //    simple implementation
3781 //      - all input must be provided in an upfront buffer
3782 //      - all output is written to a single output buffer (can malloc/realloc)
3783 //    performance
3784 //      - fast huffman
3785 
3786 #ifndef STBI_NO_ZLIB
3787 
3788 // fast-way is faster to check than jpeg huffman, but slow way is slower
3789 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3790 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3791 
3792 // zlib-style huffman encoding
3793 // (jpegs packs from left, zlib from right, so can't share code)
3794 typedef struct
3795 {
3796    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3797    stbi__uint16 firstcode[16];
3798    int maxcode[17];
3799    stbi__uint16 firstsymbol[16];
3800    stbi_uc  size[288];
3801    stbi__uint16 value[288];
3802 } stbi__zhuffman;
3803 
stbi__bitreverse16(int n)3804 stbi_inline static int stbi__bitreverse16(int n)
3805 {
3806   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3807   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3808   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3809   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3810   return n;
3811 }
3812 
stbi__bit_reverse(int v,int bits)3813 stbi_inline static int stbi__bit_reverse(int v, int bits)
3814 {
3815    STBI_ASSERT(bits <= 16);
3816    // to bit reverse n bits, reverse 16 and shift
3817    // e.g. 11 bits, bit reverse and shift away 5
3818    return stbi__bitreverse16(v) >> (16-bits);
3819 }
3820 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3821 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3822 {
3823    int i,k=0;
3824    int code, next_code[16], sizes[17];
3825 
3826    // DEFLATE spec for generating codes
3827    memset(sizes, 0, sizeof(sizes));
3828    memset(z->fast, 0, sizeof(z->fast));
3829    for (i=0; i < num; ++i)
3830       ++sizes[sizelist[i]];
3831    sizes[0] = 0;
3832    for (i=1; i < 16; ++i)
3833       if (sizes[i] > (1 << i))
3834          return stbi__err("bad sizes", "Corrupt PNG");
3835    code = 0;
3836    for (i=1; i < 16; ++i) {
3837       next_code[i] = code;
3838       z->firstcode[i] = (stbi__uint16) code;
3839       z->firstsymbol[i] = (stbi__uint16) k;
3840       code = (code + sizes[i]);
3841       if (sizes[i])
3842          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3843       z->maxcode[i] = code << (16-i); // preshift for inner loop
3844       code <<= 1;
3845       k += sizes[i];
3846    }
3847    z->maxcode[16] = 0x10000; // sentinel
3848    for (i=0; i < num; ++i) {
3849       int s = sizelist[i];
3850       if (s) {
3851          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3852          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3853          z->size [c] = (stbi_uc     ) s;
3854          z->value[c] = (stbi__uint16) i;
3855          if (s <= STBI__ZFAST_BITS) {
3856             int j = stbi__bit_reverse(next_code[s],s);
3857             while (j < (1 << STBI__ZFAST_BITS)) {
3858                z->fast[j] = fastv;
3859                j += (1 << s);
3860             }
3861          }
3862          ++next_code[s];
3863       }
3864    }
3865    return 1;
3866 }
3867 
3868 // zlib-from-memory implementation for PNG reading
3869 //    because PNG allows splitting the zlib stream arbitrarily,
3870 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3871 //    we require PNG read all the IDATs and combine them into a single
3872 //    memory buffer
3873 
3874 typedef struct
3875 {
3876    stbi_uc *zbuffer, *zbuffer_end;
3877    int num_bits;
3878    stbi__uint32 code_buffer;
3879 
3880    char *zout;
3881    char *zout_start;
3882    char *zout_end;
3883    int   z_expandable;
3884 
3885    stbi__zhuffman z_length, z_distance;
3886 } stbi__zbuf;
3887 
stbi__zget8(stbi__zbuf * z)3888 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3889 {
3890    if (z->zbuffer >= z->zbuffer_end) return 0;
3891    return *z->zbuffer++;
3892 }
3893 
stbi__fill_bits(stbi__zbuf * z)3894 static void stbi__fill_bits(stbi__zbuf *z)
3895 {
3896    do {
3897       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3898       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3899       z->num_bits += 8;
3900    } while (z->num_bits <= 24);
3901 }
3902 
stbi__zreceive(stbi__zbuf * z,int n)3903 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3904 {
3905    unsigned int k;
3906    if (z->num_bits < n) stbi__fill_bits(z);
3907    k = z->code_buffer & ((1 << n) - 1);
3908    z->code_buffer >>= n;
3909    z->num_bits -= n;
3910    return k;
3911 }
3912 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3913 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3914 {
3915    int b,s,k;
3916    // not resolved by fast table, so compute it the slow way
3917    // use jpeg approach, which requires MSbits at top
3918    k = stbi__bit_reverse(a->code_buffer, 16);
3919    for (s=STBI__ZFAST_BITS+1; ; ++s)
3920       if (k < z->maxcode[s])
3921          break;
3922    if (s == 16) return -1; // invalid code!
3923    // code size is s, so:
3924    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3925    STBI_ASSERT(z->size[b] == s);
3926    a->code_buffer >>= s;
3927    a->num_bits -= s;
3928    return z->value[b];
3929 }
3930 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3931 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3932 {
3933    int b,s;
3934    if (a->num_bits < 16) stbi__fill_bits(a);
3935    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3936    if (b) {
3937       s = b >> 9;
3938       a->code_buffer >>= s;
3939       a->num_bits -= s;
3940       return b & 511;
3941    }
3942    return stbi__zhuffman_decode_slowpath(a, z);
3943 }
3944 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3945 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3946 {
3947    char *q;
3948    int cur, limit, old_limit;
3949    z->zout = zout;
3950    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3951    cur   = (int) (z->zout     - z->zout_start);
3952    limit = old_limit = (int) (z->zout_end - z->zout_start);
3953    while (cur + n > limit)
3954       limit *= 2;
3955    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3956    STBI_NOTUSED(old_limit);
3957    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3958    z->zout_start = q;
3959    z->zout       = q + cur;
3960    z->zout_end   = q + limit;
3961    return 1;
3962 }
3963 
3964 static const int stbi__zlength_base[31] = {
3965    3,4,5,6,7,8,9,10,11,13,
3966    15,17,19,23,27,31,35,43,51,59,
3967    67,83,99,115,131,163,195,227,258,0,0 };
3968 
3969 static const int stbi__zlength_extra[31]=
3970 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3971 
3972 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3973 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3974 
3975 static const int stbi__zdist_extra[32] =
3976 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3977 
stbi__parse_huffman_block(stbi__zbuf * a)3978 static int stbi__parse_huffman_block(stbi__zbuf *a)
3979 {
3980    char *zout = a->zout;
3981    for(;;) {
3982       int z = stbi__zhuffman_decode(a, &a->z_length);
3983       if (z < 256) {
3984          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3985          if (zout >= a->zout_end) {
3986             if (!stbi__zexpand(a, zout, 1)) return 0;
3987             zout = a->zout;
3988          }
3989          *zout++ = (char) z;
3990       } else {
3991          stbi_uc *p;
3992          int len,dist;
3993          if (z == 256) {
3994             a->zout = zout;
3995             return 1;
3996          }
3997          z -= 257;
3998          len = stbi__zlength_base[z];
3999          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4000          z = stbi__zhuffman_decode(a, &a->z_distance);
4001          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4002          dist = stbi__zdist_base[z];
4003          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4004          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4005          if (zout + len > a->zout_end) {
4006             if (!stbi__zexpand(a, zout, len)) return 0;
4007             zout = a->zout;
4008          }
4009          p = (stbi_uc *) (zout - dist);
4010          if (dist == 1) { // run of one byte; common in images.
4011             stbi_uc v = *p;
4012             if (len) { do *zout++ = v; while (--len); }
4013          } else {
4014             if (len) { do *zout++ = *p++; while (--len); }
4015          }
4016       }
4017    }
4018 }
4019 
stbi__compute_huffman_codes(stbi__zbuf * a)4020 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4021 {
4022    static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4023    stbi__zhuffman z_codelength;
4024    stbi_uc lencodes[286+32+137];//padding for maximum single op
4025    stbi_uc codelength_sizes[19];
4026    int i,n;
4027 
4028    int hlit  = stbi__zreceive(a,5) + 257;
4029    int hdist = stbi__zreceive(a,5) + 1;
4030    int hclen = stbi__zreceive(a,4) + 4;
4031    int ntot  = hlit + hdist;
4032 
4033    memset(codelength_sizes, 0, sizeof(codelength_sizes));
4034    for (i=0; i < hclen; ++i) {
4035       int s = stbi__zreceive(a,3);
4036       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4037    }
4038    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4039 
4040    n = 0;
4041    while (n < ntot) {
4042       int c = stbi__zhuffman_decode(a, &z_codelength);
4043       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4044       if (c < 16)
4045          lencodes[n++] = (stbi_uc) c;
4046       else {
4047          stbi_uc fill = 0;
4048          if (c == 16) {
4049             c = stbi__zreceive(a,2)+3;
4050             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4051             fill = lencodes[n-1];
4052          } else if (c == 17)
4053             c = stbi__zreceive(a,3)+3;
4054          else {
4055             STBI_ASSERT(c == 18);
4056             c = stbi__zreceive(a,7)+11;
4057          }
4058          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4059          memset(lencodes+n, fill, c);
4060          n += c;
4061       }
4062    }
4063    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4064    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4065    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4066    return 1;
4067 }
4068 
stbi__parse_uncompressed_block(stbi__zbuf * a)4069 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4070 {
4071    stbi_uc header[4];
4072    int len,nlen,k;
4073    if (a->num_bits & 7)
4074       stbi__zreceive(a, a->num_bits & 7); // discard
4075    // drain the bit-packed data into header
4076    k = 0;
4077    while (a->num_bits > 0) {
4078       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4079       a->code_buffer >>= 8;
4080       a->num_bits -= 8;
4081    }
4082    STBI_ASSERT(a->num_bits == 0);
4083    // now fill header the normal way
4084    while (k < 4)
4085       header[k++] = stbi__zget8(a);
4086    len  = header[1] * 256 + header[0];
4087    nlen = header[3] * 256 + header[2];
4088    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4089    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4090    if (a->zout + len > a->zout_end)
4091       if (!stbi__zexpand(a, a->zout, len)) return 0;
4092    memcpy(a->zout, a->zbuffer, len);
4093    a->zbuffer += len;
4094    a->zout += len;
4095    return 1;
4096 }
4097 
stbi__parse_zlib_header(stbi__zbuf * a)4098 static int stbi__parse_zlib_header(stbi__zbuf *a)
4099 {
4100    int cmf   = stbi__zget8(a);
4101    int cm    = cmf & 15;
4102    /* int cinfo = cmf >> 4; */
4103    int flg   = stbi__zget8(a);
4104    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4105    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4106    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4107    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4108    return 1;
4109 }
4110 
4111 static const stbi_uc stbi__zdefault_length[288] =
4112 {
4113    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4114    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4115    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4116    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4117    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4118    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4119    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4120    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4121    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4122 };
4123 static const stbi_uc stbi__zdefault_distance[32] =
4124 {
4125    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4126 };
4127 /*
4128 Init algorithm:
4129 {
4130    int i;   // use <= to match clearly with spec
4131    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4132    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4133    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4134    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4135 
4136    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4137 }
4138 */
4139 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4140 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4141 {
4142    int final, type;
4143    if (parse_header)
4144       if (!stbi__parse_zlib_header(a)) return 0;
4145    a->num_bits = 0;
4146    a->code_buffer = 0;
4147    do {
4148       final = stbi__zreceive(a,1);
4149       type = stbi__zreceive(a,2);
4150       if (type == 0) {
4151          if (!stbi__parse_uncompressed_block(a)) return 0;
4152       } else if (type == 3) {
4153          return 0;
4154       } else {
4155          if (type == 1) {
4156             // use fixed code lengths
4157             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4158             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4159          } else {
4160             if (!stbi__compute_huffman_codes(a)) return 0;
4161          }
4162          if (!stbi__parse_huffman_block(a)) return 0;
4163       }
4164    } while (!final);
4165    return 1;
4166 }
4167 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4168 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4169 {
4170    a->zout_start = obuf;
4171    a->zout       = obuf;
4172    a->zout_end   = obuf + olen;
4173    a->z_expandable = exp;
4174 
4175    return stbi__parse_zlib(a, parse_header);
4176 }
4177 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4178 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4179 {
4180    stbi__zbuf a;
4181    char *p = (char *) stbi__malloc(initial_size);
4182    if (p == NULL) return NULL;
4183    a.zbuffer = (stbi_uc *) buffer;
4184    a.zbuffer_end = (stbi_uc *) buffer + len;
4185    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4186       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4187       return a.zout_start;
4188    } else {
4189       STBI_FREE(a.zout_start);
4190       return NULL;
4191    }
4192 }
4193 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4194 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4195 {
4196    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4197 }
4198 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4199 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4200 {
4201    stbi__zbuf a;
4202    char *p = (char *) stbi__malloc(initial_size);
4203    if (p == NULL) return NULL;
4204    a.zbuffer = (stbi_uc *) buffer;
4205    a.zbuffer_end = (stbi_uc *) buffer + len;
4206    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4207       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4208       return a.zout_start;
4209    } else {
4210       STBI_FREE(a.zout_start);
4211       return NULL;
4212    }
4213 }
4214 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4215 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4216 {
4217    stbi__zbuf a;
4218    a.zbuffer = (stbi_uc *) ibuffer;
4219    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4220    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4221       return (int) (a.zout - a.zout_start);
4222    else
4223       return -1;
4224 }
4225 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4226 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4227 {
4228    stbi__zbuf a;
4229    char *p = (char *) stbi__malloc(16384);
4230    if (p == NULL) return NULL;
4231    a.zbuffer = (stbi_uc *) buffer;
4232    a.zbuffer_end = (stbi_uc *) buffer+len;
4233    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4234       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4235       return a.zout_start;
4236    } else {
4237       STBI_FREE(a.zout_start);
4238       return NULL;
4239    }
4240 }
4241 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4242 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4243 {
4244    stbi__zbuf a;
4245    a.zbuffer = (stbi_uc *) ibuffer;
4246    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4247    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4248       return (int) (a.zout - a.zout_start);
4249    else
4250       return -1;
4251 }
4252 #endif
4253 
4254 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4255 //    simple implementation
4256 //      - only 8-bit samples
4257 //      - no CRC checking
4258 //      - allocates lots of intermediate memory
4259 //        - avoids problem of streaming data between subsystems
4260 //        - avoids explicit window management
4261 //    performance
4262 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4263 
4264 #ifndef STBI_NO_PNG
4265 typedef struct
4266 {
4267    stbi__uint32 length;
4268    stbi__uint32 type;
4269 } stbi__pngchunk;
4270 
stbi__get_chunk_header(stbi__context * s)4271 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4272 {
4273    stbi__pngchunk c;
4274    c.length = stbi__get32be(s);
4275    c.type   = stbi__get32be(s);
4276    return c;
4277 }
4278 
stbi__check_png_header(stbi__context * s)4279 static int stbi__check_png_header(stbi__context *s)
4280 {
4281    static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4282    int i;
4283    for (i=0; i < 8; ++i)
4284       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4285    return 1;
4286 }
4287 
4288 typedef struct
4289 {
4290    stbi__context *s;
4291    stbi_uc *idata, *expanded, *out;
4292    int depth;
4293 } stbi__png;
4294 
4295 
4296 enum {
4297    STBI__F_none=0,
4298    STBI__F_sub=1,
4299    STBI__F_up=2,
4300    STBI__F_avg=3,
4301    STBI__F_paeth=4,
4302    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4303    STBI__F_avg_first,
4304    STBI__F_paeth_first
4305 };
4306 
4307 static stbi_uc first_row_filter[5] =
4308 {
4309    STBI__F_none,
4310    STBI__F_sub,
4311    STBI__F_none,
4312    STBI__F_avg_first,
4313    STBI__F_paeth_first
4314 };
4315 
stbi__paeth(int a,int b,int c)4316 static int stbi__paeth(int a, int b, int c)
4317 {
4318    int p = a + b - c;
4319    int pa = abs(p-a);
4320    int pb = abs(p-b);
4321    int pc = abs(p-c);
4322    if (pa <= pb && pa <= pc) return a;
4323    if (pb <= pc) return b;
4324    return c;
4325 }
4326 
4327 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4328 
4329 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4330 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4331 {
4332    int bytes = (depth == 16? 2 : 1);
4333    stbi__context *s = a->s;
4334    stbi__uint32 i,j,stride = x*out_n*bytes;
4335    stbi__uint32 img_len, img_width_bytes;
4336    int k;
4337    int img_n = s->img_n; // copy it into a local for later
4338 
4339    int output_bytes = out_n*bytes;
4340    int filter_bytes = img_n*bytes;
4341    int width = x;
4342 
4343    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4344    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4345    if (!a->out) return stbi__err("outofmem", "Out of memory");
4346 
4347    if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4348    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4349    img_len = (img_width_bytes + 1) * y;
4350 
4351    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4352    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4353    // so just check for raw_len < img_len always.
4354    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4355 
4356    for (j=0; j < y; ++j) {
4357       stbi_uc *cur = a->out + stride*j;
4358       stbi_uc *prior;
4359       int filter = *raw++;
4360 
4361       if (filter > 4)
4362          return stbi__err("invalid filter","Corrupt PNG");
4363 
4364       if (depth < 8) {
4365          STBI_ASSERT(img_width_bytes <= x);
4366          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4367          filter_bytes = 1;
4368          width = img_width_bytes;
4369       }
4370       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4371 
4372       // if first row, use special filter that doesn't sample previous row
4373       if (j == 0) filter = first_row_filter[filter];
4374 
4375       // handle first byte explicitly
4376       for (k=0; k < filter_bytes; ++k) {
4377          switch (filter) {
4378             case STBI__F_none       : cur[k] = raw[k]; break;
4379             case STBI__F_sub        : cur[k] = raw[k]; break;
4380             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4381             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4382             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4383             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4384             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4385          }
4386       }
4387 
4388       if (depth == 8) {
4389          if (img_n != out_n)
4390             cur[img_n] = 255; // first pixel
4391          raw += img_n;
4392          cur += out_n;
4393          prior += out_n;
4394       } else if (depth == 16) {
4395          if (img_n != out_n) {
4396             cur[filter_bytes]   = 255; // first pixel top byte
4397             cur[filter_bytes+1] = 255; // first pixel bottom byte
4398          }
4399          raw += filter_bytes;
4400          cur += output_bytes;
4401          prior += output_bytes;
4402       } else {
4403          raw += 1;
4404          cur += 1;
4405          prior += 1;
4406       }
4407 
4408       // this is a little gross, so that we don't switch per-pixel or per-component
4409       if (depth < 8 || img_n == out_n) {
4410          int nk = (width - 1)*filter_bytes;
4411          #define STBI__CASE(f) \
4412              case f:     \
4413                 for (k=0; k < nk; ++k)
4414          switch (filter) {
4415             // "none" filter turns into a memcpy here; make that explicit.
4416             case STBI__F_none:         memcpy(cur, raw, nk); break;
4417             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4418             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4419             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4420             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4421             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4422             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4423          }
4424          #undef STBI__CASE
4425          raw += nk;
4426       } else {
4427          STBI_ASSERT(img_n+1 == out_n);
4428          #define STBI__CASE(f) \
4429              case f:     \
4430                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4431                    for (k=0; k < filter_bytes; ++k)
4432          switch (filter) {
4433             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4434             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4435             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4436             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4437             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4438             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4439             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4440          }
4441          #undef STBI__CASE
4442 
4443          // the loop above sets the high byte of the pixels' alpha, but for
4444          // 16 bit png files we also need the low byte set. we'll do that here.
4445          if (depth == 16) {
4446             cur = a->out + stride*j; // start at the beginning of the row again
4447             for (i=0; i < x; ++i,cur+=output_bytes) {
4448                cur[filter_bytes+1] = 255;
4449             }
4450          }
4451       }
4452    }
4453 
4454    // we make a separate pass to expand bits to pixels; for performance,
4455    // this could run two scanlines behind the above code, so it won't
4456    // intefere with filtering but will still be in the cache.
4457    if (depth < 8) {
4458       for (j=0; j < y; ++j) {
4459          stbi_uc *cur = a->out + stride*j;
4460          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4461          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4462          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4463          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4464 
4465          // note that the final byte might overshoot and write more data than desired.
4466          // we can allocate enough data that this never writes out of memory, but it
4467          // could also overwrite the next scanline. can it overwrite non-empty data
4468          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4469          // so we need to explicitly clamp the final ones
4470 
4471          if (depth == 4) {
4472             for (k=x*img_n; k >= 2; k-=2, ++in) {
4473                *cur++ = scale * ((*in >> 4)       );
4474                *cur++ = scale * ((*in     ) & 0x0f);
4475             }
4476             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4477          } else if (depth == 2) {
4478             for (k=x*img_n; k >= 4; k-=4, ++in) {
4479                *cur++ = scale * ((*in >> 6)       );
4480                *cur++ = scale * ((*in >> 4) & 0x03);
4481                *cur++ = scale * ((*in >> 2) & 0x03);
4482                *cur++ = scale * ((*in     ) & 0x03);
4483             }
4484             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4485             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4486             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4487          } else if (depth == 1) {
4488             for (k=x*img_n; k >= 8; k-=8, ++in) {
4489                *cur++ = scale * ((*in >> 7)       );
4490                *cur++ = scale * ((*in >> 6) & 0x01);
4491                *cur++ = scale * ((*in >> 5) & 0x01);
4492                *cur++ = scale * ((*in >> 4) & 0x01);
4493                *cur++ = scale * ((*in >> 3) & 0x01);
4494                *cur++ = scale * ((*in >> 2) & 0x01);
4495                *cur++ = scale * ((*in >> 1) & 0x01);
4496                *cur++ = scale * ((*in     ) & 0x01);
4497             }
4498             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4499             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4500             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4501             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4502             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4503             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4504             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4505          }
4506          if (img_n != out_n) {
4507             int q;
4508             // insert alpha = 255
4509             cur = a->out + stride*j;
4510             if (img_n == 1) {
4511                for (q=x-1; q >= 0; --q) {
4512                   cur[q*2+1] = 255;
4513                   cur[q*2+0] = cur[q];
4514                }
4515             } else {
4516                STBI_ASSERT(img_n == 3);
4517                for (q=x-1; q >= 0; --q) {
4518                   cur[q*4+3] = 255;
4519                   cur[q*4+2] = cur[q*3+2];
4520                   cur[q*4+1] = cur[q*3+1];
4521                   cur[q*4+0] = cur[q*3+0];
4522                }
4523             }
4524          }
4525       }
4526    } else if (depth == 16) {
4527       // force the image data from big-endian to platform-native.
4528       // this is done in a separate pass due to the decoding relying
4529       // on the data being untouched, but could probably be done
4530       // per-line during decode if care is taken.
4531       stbi_uc *cur = a->out;
4532       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4533 
4534       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4535          *cur16 = (cur[0] << 8) | cur[1];
4536       }
4537    }
4538 
4539    return 1;
4540 }
4541 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4542 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4543 {
4544    int bytes = (depth == 16 ? 2 : 1);
4545    int out_bytes = out_n * bytes;
4546    stbi_uc *final;
4547    int p;
4548    if (!interlaced)
4549       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4550 
4551    // de-interlacing
4552    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4553    for (p=0; p < 7; ++p) {
4554       int xorig[] = { 0,4,0,2,0,1,0 };
4555       int yorig[] = { 0,0,4,0,2,0,1 };
4556       int xspc[]  = { 8,8,4,4,2,2,1 };
4557       int yspc[]  = { 8,8,8,4,4,2,2 };
4558       int i,j,x,y;
4559       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4560       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4561       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4562       if (x && y) {
4563          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4564          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4565             STBI_FREE(final);
4566             return 0;
4567          }
4568          for (j=0; j < y; ++j) {
4569             for (i=0; i < x; ++i) {
4570                int out_y = j*yspc[p]+yorig[p];
4571                int out_x = i*xspc[p]+xorig[p];
4572                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4573                       a->out + (j*x+i)*out_bytes, out_bytes);
4574             }
4575          }
4576          STBI_FREE(a->out);
4577          image_data += img_len;
4578          image_data_len -= img_len;
4579       }
4580    }
4581    a->out = final;
4582 
4583    return 1;
4584 }
4585 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4586 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4587 {
4588    stbi__context *s = z->s;
4589    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4590    stbi_uc *p = z->out;
4591 
4592    // compute color-based transparency, assuming we've
4593    // already got 255 as the alpha value in the output
4594    STBI_ASSERT(out_n == 2 || out_n == 4);
4595 
4596    if (out_n == 2) {
4597       for (i=0; i < pixel_count; ++i) {
4598          p[1] = (p[0] == tc[0] ? 0 : 255);
4599          p += 2;
4600       }
4601    } else {
4602       for (i=0; i < pixel_count; ++i) {
4603          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4604             p[3] = 0;
4605          p += 4;
4606       }
4607    }
4608    return 1;
4609 }
4610 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4611 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4612 {
4613    stbi__context *s = z->s;
4614    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4615    stbi__uint16 *p = (stbi__uint16*) z->out;
4616 
4617    // compute color-based transparency, assuming we've
4618    // already got 65535 as the alpha value in the output
4619    STBI_ASSERT(out_n == 2 || out_n == 4);
4620 
4621    if (out_n == 2) {
4622       for (i = 0; i < pixel_count; ++i) {
4623          p[1] = (p[0] == tc[0] ? 0 : 65535);
4624          p += 2;
4625       }
4626    } else {
4627       for (i = 0; i < pixel_count; ++i) {
4628          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4629             p[3] = 0;
4630          p += 4;
4631       }
4632    }
4633    return 1;
4634 }
4635 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4636 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4637 {
4638    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4639    stbi_uc *p, *temp_out, *orig = a->out;
4640 
4641    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4642    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4643 
4644    // between here and free(out) below, exitting would leak
4645    temp_out = p;
4646 
4647    if (pal_img_n == 3) {
4648       for (i=0; i < pixel_count; ++i) {
4649          int n = orig[i]*4;
4650          p[0] = palette[n  ];
4651          p[1] = palette[n+1];
4652          p[2] = palette[n+2];
4653          p += 3;
4654       }
4655    } else {
4656       for (i=0; i < pixel_count; ++i) {
4657          int n = orig[i]*4;
4658          p[0] = palette[n  ];
4659          p[1] = palette[n+1];
4660          p[2] = palette[n+2];
4661          p[3] = palette[n+3];
4662          p += 4;
4663       }
4664    }
4665    STBI_FREE(a->out);
4666    a->out = temp_out;
4667 
4668    STBI_NOTUSED(len);
4669 
4670    return 1;
4671 }
4672 
4673 static int stbi__unpremultiply_on_load = 0;
4674 static int stbi__de_iphone_flag = 0;
4675 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4676 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4677 {
4678    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4679 }
4680 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4681 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4682 {
4683    stbi__de_iphone_flag = flag_true_if_should_convert;
4684 }
4685 
stbi__de_iphone(stbi__png * z)4686 static void stbi__de_iphone(stbi__png *z)
4687 {
4688    stbi__context *s = z->s;
4689    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4690    stbi_uc *p = z->out;
4691 
4692    if (s->img_out_n == 3) {  // convert bgr to rgb
4693       for (i=0; i < pixel_count; ++i) {
4694          stbi_uc t = p[0];
4695          p[0] = p[2];
4696          p[2] = t;
4697          p += 3;
4698       }
4699    } else {
4700       STBI_ASSERT(s->img_out_n == 4);
4701       if (stbi__unpremultiply_on_load) {
4702          // convert bgr to rgb and unpremultiply
4703          for (i=0; i < pixel_count; ++i) {
4704             stbi_uc a = p[3];
4705             stbi_uc t = p[0];
4706             if (a) {
4707                stbi_uc half = a / 2;
4708                p[0] = (p[2] * 255 + half) / a;
4709                p[1] = (p[1] * 255 + half) / a;
4710                p[2] = ( t   * 255 + half) / a;
4711             } else {
4712                p[0] = p[2];
4713                p[2] = t;
4714             }
4715             p += 4;
4716          }
4717       } else {
4718          // convert bgr to rgb
4719          for (i=0; i < pixel_count; ++i) {
4720             stbi_uc t = p[0];
4721             p[0] = p[2];
4722             p[2] = t;
4723             p += 4;
4724          }
4725       }
4726    }
4727 }
4728 
4729 #define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4730 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4731 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4732 {
4733    stbi_uc palette[1024], pal_img_n=0;
4734    stbi_uc has_trans=0, tc[3];
4735    stbi__uint16 tc16[3];
4736    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4737    int first=1,k,interlace=0, color=0, is_iphone=0;
4738    stbi__context *s = z->s;
4739 
4740    z->expanded = NULL;
4741    z->idata = NULL;
4742    z->out = NULL;
4743 
4744    if (!stbi__check_png_header(s)) return 0;
4745 
4746    if (scan == STBI__SCAN_type) return 1;
4747 
4748    for (;;) {
4749       stbi__pngchunk c = stbi__get_chunk_header(s);
4750       switch (c.type) {
4751          case STBI__PNG_TYPE('C','g','B','I'):
4752             is_iphone = 1;
4753             stbi__skip(s, c.length);
4754             break;
4755          case STBI__PNG_TYPE('I','H','D','R'): {
4756             int comp,filter;
4757             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4758             first = 0;
4759             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4760             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4761             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4762             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4763             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4764             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4765             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4766             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4767             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4768             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4769             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4770             if (!pal_img_n) {
4771                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4772                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4773                if (scan == STBI__SCAN_header) return 1;
4774             } else {
4775                // if paletted, then pal_n is our final components, and
4776                // img_n is # components to decompress/filter.
4777                s->img_n = 1;
4778                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4779                // if SCAN_header, have to scan to see if we have a tRNS
4780             }
4781             break;
4782          }
4783 
4784          case STBI__PNG_TYPE('P','L','T','E'):  {
4785             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4786             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4787             pal_len = c.length / 3;
4788             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4789             for (i=0; i < pal_len; ++i) {
4790                palette[i*4+0] = stbi__get8(s);
4791                palette[i*4+1] = stbi__get8(s);
4792                palette[i*4+2] = stbi__get8(s);
4793                palette[i*4+3] = 255;
4794             }
4795             break;
4796          }
4797 
4798          case STBI__PNG_TYPE('t','R','N','S'): {
4799             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4800             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4801             if (pal_img_n) {
4802                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4803                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4804                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4805                pal_img_n = 4;
4806                for (i=0; i < c.length; ++i)
4807                   palette[i*4+3] = stbi__get8(s);
4808             } else {
4809                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4810                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4811                has_trans = 1;
4812                if (z->depth == 16) {
4813                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4814                } else {
4815                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4816                }
4817             }
4818             break;
4819          }
4820 
4821          case STBI__PNG_TYPE('I','D','A','T'): {
4822             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4823             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4824             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4825             if ((int)(ioff + c.length) < (int)ioff) return 0;
4826             if (ioff + c.length > idata_limit) {
4827                stbi__uint32 idata_limit_old = idata_limit;
4828                stbi_uc *p;
4829                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4830                while (ioff + c.length > idata_limit)
4831                   idata_limit *= 2;
4832                STBI_NOTUSED(idata_limit_old);
4833                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4834                z->idata = p;
4835             }
4836             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4837             ioff += c.length;
4838             break;
4839          }
4840 
4841          case STBI__PNG_TYPE('I','E','N','D'): {
4842             stbi__uint32 raw_len, bpl;
4843             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4844             if (scan != STBI__SCAN_load) return 1;
4845             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4846             // initial guess for decoded data size to avoid unnecessary reallocs
4847             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4848             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4849             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4850             if (z->expanded == NULL) return 0; // zlib should set error
4851             STBI_FREE(z->idata); z->idata = NULL;
4852             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4853                s->img_out_n = s->img_n+1;
4854             else
4855                s->img_out_n = s->img_n;
4856             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4857             if (has_trans) {
4858                if (z->depth == 16) {
4859                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4860                } else {
4861                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4862                }
4863             }
4864             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4865                stbi__de_iphone(z);
4866             if (pal_img_n) {
4867                // pal_img_n == 3 or 4
4868                s->img_n = pal_img_n; // record the actual colors we had
4869                s->img_out_n = pal_img_n;
4870                if (req_comp >= 3) s->img_out_n = req_comp;
4871                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4872                   return 0;
4873             } else if (has_trans) {
4874                // non-paletted image with tRNS -> source image has (constant) alpha
4875                ++s->img_n;
4876             }
4877             STBI_FREE(z->expanded); z->expanded = NULL;
4878             return 1;
4879          }
4880 
4881          default:
4882             // if critical, fail
4883             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4884             if ((c.type & (1 << 29)) == 0) {
4885                #ifndef STBI_NO_FAILURE_STRINGS
4886                // not threadsafe
4887                static char invalid_chunk[] = "XXXX PNG chunk not known";
4888                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4889                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4890                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4891                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4892                #endif
4893                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4894             }
4895             stbi__skip(s, c.length);
4896             break;
4897       }
4898       // end of PNG chunk, read and skip CRC
4899       stbi__get32be(s);
4900    }
4901 }
4902 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4903 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4904 {
4905    void *result=NULL;
4906    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4907    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4908       if (p->depth < 8)
4909          ri->bits_per_channel = 8;
4910       else
4911          ri->bits_per_channel = p->depth;
4912       result = p->out;
4913       p->out = NULL;
4914       if (req_comp && req_comp != p->s->img_out_n) {
4915          if (ri->bits_per_channel == 8)
4916             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4917          else
4918             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4919          p->s->img_out_n = req_comp;
4920          if (result == NULL) return result;
4921       }
4922       *x = p->s->img_x;
4923       *y = p->s->img_y;
4924       if (n) *n = p->s->img_n;
4925    }
4926    STBI_FREE(p->out);      p->out      = NULL;
4927    STBI_FREE(p->expanded); p->expanded = NULL;
4928    STBI_FREE(p->idata);    p->idata    = NULL;
4929 
4930    return result;
4931 }
4932 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4933 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4934 {
4935    stbi__png p;
4936    p.s = s;
4937    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4938 }
4939 
stbi__png_test(stbi__context * s)4940 static int stbi__png_test(stbi__context *s)
4941 {
4942    int r;
4943    r = stbi__check_png_header(s);
4944    stbi__rewind(s);
4945    return r;
4946 }
4947 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4948 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4949 {
4950    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4951       stbi__rewind( p->s );
4952       return 0;
4953    }
4954    if (x) *x = p->s->img_x;
4955    if (y) *y = p->s->img_y;
4956    if (comp) *comp = p->s->img_n;
4957    return 1;
4958 }
4959 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4960 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4961 {
4962    stbi__png p;
4963    p.s = s;
4964    return stbi__png_info_raw(&p, x, y, comp);
4965 }
4966 
stbi__png_is16(stbi__context * s)4967 static int stbi__png_is16(stbi__context *s)
4968 {
4969    stbi__png p;
4970    p.s = s;
4971    if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
4972 	   return 0;
4973    if (p.depth != 16) {
4974       stbi__rewind(p.s);
4975       return 0;
4976    }
4977    return 1;
4978 }
4979 #endif
4980 
4981 // Microsoft/Windows BMP image
4982 
4983 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4984 static int stbi__bmp_test_raw(stbi__context *s)
4985 {
4986    int r;
4987    int sz;
4988    if (stbi__get8(s) != 'B') return 0;
4989    if (stbi__get8(s) != 'M') return 0;
4990    stbi__get32le(s); // discard filesize
4991    stbi__get16le(s); // discard reserved
4992    stbi__get16le(s); // discard reserved
4993    stbi__get32le(s); // discard data offset
4994    sz = stbi__get32le(s);
4995    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4996    return r;
4997 }
4998 
stbi__bmp_test(stbi__context * s)4999 static int stbi__bmp_test(stbi__context *s)
5000 {
5001    int r = stbi__bmp_test_raw(s);
5002    stbi__rewind(s);
5003    return r;
5004 }
5005 
5006 
5007 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)5008 static int stbi__high_bit(unsigned int z)
5009 {
5010    int n=0;
5011    if (z == 0) return -1;
5012    if (z >= 0x10000) n += 16, z >>= 16;
5013    if (z >= 0x00100) n +=  8, z >>=  8;
5014    if (z >= 0x00010) n +=  4, z >>=  4;
5015    if (z >= 0x00004) n +=  2, z >>=  2;
5016    if (z >= 0x00002) n +=  1, z >>=  1;
5017    return n;
5018 }
5019 
stbi__bitcount(unsigned int a)5020 static int stbi__bitcount(unsigned int a)
5021 {
5022    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
5023    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
5024    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5025    a = (a + (a >> 8)); // max 16 per 8 bits
5026    a = (a + (a >> 16)); // max 32 per 8 bits
5027    return a & 0xff;
5028 }
5029 
5030 // extract an arbitrarily-aligned N-bit value (N=bits)
5031 // from v, and then make it 8-bits long and fractionally
5032 // extend it to full full range.
stbi__shiftsigned(int v,int shift,int bits)5033 static int stbi__shiftsigned(int v, int shift, int bits)
5034 {
5035    static unsigned int mul_table[9] = {
5036       0,
5037       0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5038       0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5039    };
5040    static unsigned int shift_table[9] = {
5041       0, 0,0,1,0,2,4,6,0,
5042    };
5043    if (shift < 0)
5044       v <<= -shift;
5045    else
5046       v >>= shift;
5047    STBI_ASSERT(v >= 0 && v < 256);
5048    v >>= (8-bits);
5049    STBI_ASSERT(bits >= 0 && bits <= 8);
5050    return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5051 }
5052 
5053 typedef struct
5054 {
5055    int bpp, offset, hsz;
5056    unsigned int mr,mg,mb,ma, all_a;
5057 } stbi__bmp_data;
5058 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)5059 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5060 {
5061    int hsz;
5062    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5063    stbi__get32le(s); // discard filesize
5064    stbi__get16le(s); // discard reserved
5065    stbi__get16le(s); // discard reserved
5066    info->offset = stbi__get32le(s);
5067    info->hsz = hsz = stbi__get32le(s);
5068    info->mr = info->mg = info->mb = info->ma = 0;
5069 
5070    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5071    if (hsz == 12) {
5072       s->img_x = stbi__get16le(s);
5073       s->img_y = stbi__get16le(s);
5074    } else {
5075       s->img_x = stbi__get32le(s);
5076       s->img_y = stbi__get32le(s);
5077    }
5078    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5079    info->bpp = stbi__get16le(s);
5080    if (hsz != 12) {
5081       int compress = stbi__get32le(s);
5082       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5083       stbi__get32le(s); // discard sizeof
5084       stbi__get32le(s); // discard hres
5085       stbi__get32le(s); // discard vres
5086       stbi__get32le(s); // discard colorsused
5087       stbi__get32le(s); // discard max important
5088       if (hsz == 40 || hsz == 56) {
5089          if (hsz == 56) {
5090             stbi__get32le(s);
5091             stbi__get32le(s);
5092             stbi__get32le(s);
5093             stbi__get32le(s);
5094          }
5095          if (info->bpp == 16 || info->bpp == 32) {
5096             if (compress == 0) {
5097                if (info->bpp == 32) {
5098                   info->mr = 0xffu << 16;
5099                   info->mg = 0xffu <<  8;
5100                   info->mb = 0xffu <<  0;
5101                   info->ma = 0xffu << 24;
5102                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5103                } else {
5104                   info->mr = 31u << 10;
5105                   info->mg = 31u <<  5;
5106                   info->mb = 31u <<  0;
5107                }
5108             } else if (compress == 3) {
5109                info->mr = stbi__get32le(s);
5110                info->mg = stbi__get32le(s);
5111                info->mb = stbi__get32le(s);
5112                // not documented, but generated by photoshop and handled by mspaint
5113                if (info->mr == info->mg && info->mg == info->mb) {
5114                   // ?!?!?
5115                   return stbi__errpuc("bad BMP", "bad BMP");
5116                }
5117             } else
5118                return stbi__errpuc("bad BMP", "bad BMP");
5119          }
5120       } else {
5121          int i;
5122          if (hsz != 108 && hsz != 124)
5123             return stbi__errpuc("bad BMP", "bad BMP");
5124          info->mr = stbi__get32le(s);
5125          info->mg = stbi__get32le(s);
5126          info->mb = stbi__get32le(s);
5127          info->ma = stbi__get32le(s);
5128          stbi__get32le(s); // discard color space
5129          for (i=0; i < 12; ++i)
5130             stbi__get32le(s); // discard color space parameters
5131          if (hsz == 124) {
5132             stbi__get32le(s); // discard rendering intent
5133             stbi__get32le(s); // discard offset of profile data
5134             stbi__get32le(s); // discard size of profile data
5135             stbi__get32le(s); // discard reserved
5136          }
5137       }
5138    }
5139    return (void *) 1;
5140 }
5141 
5142 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5143 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5144 {
5145    stbi_uc *out;
5146    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5147    stbi_uc pal[256][4];
5148    int psize=0,i,j,width;
5149    int flip_vertically, pad, target;
5150    stbi__bmp_data info;
5151    STBI_NOTUSED(ri);
5152 
5153    info.all_a = 255;
5154    if (stbi__bmp_parse_header(s, &info) == NULL)
5155       return NULL; // error code already set
5156 
5157    flip_vertically = ((int) s->img_y) > 0;
5158    s->img_y = abs((int) s->img_y);
5159 
5160    mr = info.mr;
5161    mg = info.mg;
5162    mb = info.mb;
5163    ma = info.ma;
5164    all_a = info.all_a;
5165 
5166    if (info.hsz == 12) {
5167       if (info.bpp < 24)
5168          psize = (info.offset - 14 - 24) / 3;
5169    } else {
5170       if (info.bpp < 16)
5171          psize = (info.offset - 14 - info.hsz) >> 2;
5172    }
5173 
5174    s->img_n = ma ? 4 : 3;
5175    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5176       target = req_comp;
5177    else
5178       target = s->img_n; // if they want monochrome, we'll post-convert
5179 
5180    // sanity-check size
5181    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5182       return stbi__errpuc("too large", "Corrupt BMP");
5183 
5184    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5185    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5186    if (info.bpp < 16) {
5187       int z=0;
5188       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5189       for (i=0; i < psize; ++i) {
5190          pal[i][2] = stbi__get8(s);
5191          pal[i][1] = stbi__get8(s);
5192          pal[i][0] = stbi__get8(s);
5193          if (info.hsz != 12) stbi__get8(s);
5194          pal[i][3] = 255;
5195       }
5196       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5197       if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5198       else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5199       else if (info.bpp == 8) width = s->img_x;
5200       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5201       pad = (-width)&3;
5202       if (info.bpp == 1) {
5203          for (j=0; j < (int) s->img_y; ++j) {
5204             int bit_offset = 7, v = stbi__get8(s);
5205             for (i=0; i < (int) s->img_x; ++i) {
5206                int color = (v>>bit_offset)&0x1;
5207                out[z++] = pal[color][0];
5208                out[z++] = pal[color][1];
5209                out[z++] = pal[color][2];
5210                if((--bit_offset) < 0) {
5211                   bit_offset = 7;
5212                   v = stbi__get8(s);
5213                }
5214             }
5215             stbi__skip(s, pad);
5216          }
5217       } else {
5218          for (j=0; j < (int) s->img_y; ++j) {
5219             for (i=0; i < (int) s->img_x; i += 2) {
5220                int v=stbi__get8(s),v2=0;
5221                if (info.bpp == 4) {
5222                   v2 = v & 15;
5223                   v >>= 4;
5224                }
5225                out[z++] = pal[v][0];
5226                out[z++] = pal[v][1];
5227                out[z++] = pal[v][2];
5228                if (target == 4) out[z++] = 255;
5229                if (i+1 == (int) s->img_x) break;
5230                v = (info.bpp == 8) ? stbi__get8(s) : v2;
5231                out[z++] = pal[v][0];
5232                out[z++] = pal[v][1];
5233                out[z++] = pal[v][2];
5234                if (target == 4) out[z++] = 255;
5235             }
5236             stbi__skip(s, pad);
5237          }
5238       }
5239    } else {
5240       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5241       int z = 0;
5242       int easy=0;
5243       stbi__skip(s, info.offset - 14 - info.hsz);
5244       if (info.bpp == 24) width = 3 * s->img_x;
5245       else if (info.bpp == 16) width = 2*s->img_x;
5246       else /* bpp = 32 and pad = 0 */ width=0;
5247       pad = (-width) & 3;
5248       if (info.bpp == 24) {
5249          easy = 1;
5250       } else if (info.bpp == 32) {
5251          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5252             easy = 2;
5253       }
5254       if (!easy) {
5255          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5256          // right shift amt to put high bit in position #7
5257          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5258          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5259          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5260          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5261       }
5262       for (j=0; j < (int) s->img_y; ++j) {
5263          if (easy) {
5264             for (i=0; i < (int) s->img_x; ++i) {
5265                unsigned char a;
5266                out[z+2] = stbi__get8(s);
5267                out[z+1] = stbi__get8(s);
5268                out[z+0] = stbi__get8(s);
5269                z += 3;
5270                a = (easy == 2 ? stbi__get8(s) : 255);
5271                all_a |= a;
5272                if (target == 4) out[z++] = a;
5273             }
5274          } else {
5275             int bpp = info.bpp;
5276             for (i=0; i < (int) s->img_x; ++i) {
5277                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5278                unsigned int a;
5279                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5280                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5281                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5282                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5283                all_a |= a;
5284                if (target == 4) out[z++] = STBI__BYTECAST(a);
5285             }
5286          }
5287          stbi__skip(s, pad);
5288       }
5289    }
5290 
5291    // if alpha channel is all 0s, replace with all 255s
5292    if (target == 4 && all_a == 0)
5293       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5294          out[i] = 255;
5295 
5296    if (flip_vertically) {
5297       stbi_uc t;
5298       for (j=0; j < (int) s->img_y>>1; ++j) {
5299          stbi_uc *p1 = out +      j     *s->img_x*target;
5300          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5301          for (i=0; i < (int) s->img_x*target; ++i) {
5302             t = p1[i], p1[i] = p2[i], p2[i] = t;
5303          }
5304       }
5305    }
5306 
5307    if (req_comp && req_comp != target) {
5308       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5309       if (out == NULL) return out; // stbi__convert_format frees input on failure
5310    }
5311 
5312    *x = s->img_x;
5313    *y = s->img_y;
5314    if (comp) *comp = s->img_n;
5315    return out;
5316 }
5317 #endif
5318 
5319 // Targa Truevision - TGA
5320 // by Jonathan Dummer
5321 #ifndef STBI_NO_TGA
5322 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5323 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5324 {
5325    // only RGB or RGBA (incl. 16bit) or grey allowed
5326    if (is_rgb16) *is_rgb16 = 0;
5327    switch(bits_per_pixel) {
5328       case 8:  return STBI_grey;
5329       case 16: if(is_grey) return STBI_grey_alpha;
5330                // fallthrough
5331       case 15: if(is_rgb16) *is_rgb16 = 1;
5332                return STBI_rgb;
5333       case 24: // fallthrough
5334       case 32: return bits_per_pixel/8;
5335       default: return 0;
5336    }
5337 }
5338 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5339 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5340 {
5341     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5342     int sz, tga_colormap_type;
5343     stbi__get8(s);                   // discard Offset
5344     tga_colormap_type = stbi__get8(s); // colormap type
5345     if( tga_colormap_type > 1 ) {
5346         stbi__rewind(s);
5347         return 0;      // only RGB or indexed allowed
5348     }
5349     tga_image_type = stbi__get8(s); // image type
5350     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5351         if (tga_image_type != 1 && tga_image_type != 9) {
5352             stbi__rewind(s);
5353             return 0;
5354         }
5355         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5356         sz = stbi__get8(s);    //   check bits per palette color entry
5357         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5358             stbi__rewind(s);
5359             return 0;
5360         }
5361         stbi__skip(s,4);       // skip image x and y origin
5362         tga_colormap_bpp = sz;
5363     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5364         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5365             stbi__rewind(s);
5366             return 0; // only RGB or grey allowed, +/- RLE
5367         }
5368         stbi__skip(s,9); // skip colormap specification and image x/y origin
5369         tga_colormap_bpp = 0;
5370     }
5371     tga_w = stbi__get16le(s);
5372     if( tga_w < 1 ) {
5373         stbi__rewind(s);
5374         return 0;   // test width
5375     }
5376     tga_h = stbi__get16le(s);
5377     if( tga_h < 1 ) {
5378         stbi__rewind(s);
5379         return 0;   // test height
5380     }
5381     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5382     stbi__get8(s); // ignore alpha bits
5383     if (tga_colormap_bpp != 0) {
5384         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5385             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5386             // I don't think anything but 8 or 16bit indexes makes sense
5387             stbi__rewind(s);
5388             return 0;
5389         }
5390         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5391     } else {
5392         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5393     }
5394     if(!tga_comp) {
5395       stbi__rewind(s);
5396       return 0;
5397     }
5398     if (x) *x = tga_w;
5399     if (y) *y = tga_h;
5400     if (comp) *comp = tga_comp;
5401     return 1;                   // seems to have passed everything
5402 }
5403 
stbi__tga_test(stbi__context * s)5404 static int stbi__tga_test(stbi__context *s)
5405 {
5406    int res = 0;
5407    int sz, tga_color_type;
5408    stbi__get8(s);      //   discard Offset
5409    tga_color_type = stbi__get8(s);   //   color type
5410    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5411    sz = stbi__get8(s);   //   image type
5412    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5413       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5414       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5415       sz = stbi__get8(s);    //   check bits per palette color entry
5416       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5417       stbi__skip(s,4);       // skip image x and y origin
5418    } else { // "normal" image w/o colormap
5419       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5420       stbi__skip(s,9); // skip colormap specification and image x/y origin
5421    }
5422    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5423    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5424    sz = stbi__get8(s);   //   bits per pixel
5425    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5426    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5427 
5428    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5429 
5430 errorEnd:
5431    stbi__rewind(s);
5432    return res;
5433 }
5434 
5435 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5436 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5437 {
5438    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5439    stbi__uint16 fiveBitMask = 31;
5440    // we have 3 channels with 5bits each
5441    int r = (px >> 10) & fiveBitMask;
5442    int g = (px >> 5) & fiveBitMask;
5443    int b = px & fiveBitMask;
5444    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5445    out[0] = (stbi_uc)((r * 255)/31);
5446    out[1] = (stbi_uc)((g * 255)/31);
5447    out[2] = (stbi_uc)((b * 255)/31);
5448 
5449    // some people claim that the most significant bit might be used for alpha
5450    // (possibly if an alpha-bit is set in the "image descriptor byte")
5451    // but that only made 16bit test images completely translucent..
5452    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5453 }
5454 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5455 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5456 {
5457    //   read in the TGA header stuff
5458    int tga_offset = stbi__get8(s);
5459    int tga_indexed = stbi__get8(s);
5460    int tga_image_type = stbi__get8(s);
5461    int tga_is_RLE = 0;
5462    int tga_palette_start = stbi__get16le(s);
5463    int tga_palette_len = stbi__get16le(s);
5464    int tga_palette_bits = stbi__get8(s);
5465    int tga_x_origin = stbi__get16le(s);
5466    int tga_y_origin = stbi__get16le(s);
5467    int tga_width = stbi__get16le(s);
5468    int tga_height = stbi__get16le(s);
5469    int tga_bits_per_pixel = stbi__get8(s);
5470    int tga_comp, tga_rgb16=0;
5471    int tga_inverted = stbi__get8(s);
5472    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5473    //   image data
5474    unsigned char *tga_data;
5475    unsigned char *tga_palette = NULL;
5476    int i, j;
5477    unsigned char raw_data[4] = {0};
5478    int RLE_count = 0;
5479    int RLE_repeating = 0;
5480    int read_next_pixel = 1;
5481    STBI_NOTUSED(ri);
5482 
5483    //   do a tiny bit of precessing
5484    if ( tga_image_type >= 8 )
5485    {
5486       tga_image_type -= 8;
5487       tga_is_RLE = 1;
5488    }
5489    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5490 
5491    //   If I'm paletted, then I'll use the number of bits from the palette
5492    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5493    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5494 
5495    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5496       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5497 
5498    //   tga info
5499    *x = tga_width;
5500    *y = tga_height;
5501    if (comp) *comp = tga_comp;
5502 
5503    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5504       return stbi__errpuc("too large", "Corrupt TGA");
5505 
5506    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5507    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5508 
5509    // skip to the data's starting position (offset usually = 0)
5510    stbi__skip(s, tga_offset );
5511 
5512    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5513       for (i=0; i < tga_height; ++i) {
5514          int row = tga_inverted ? tga_height -i - 1 : i;
5515          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5516          stbi__getn(s, tga_row, tga_width * tga_comp);
5517       }
5518    } else  {
5519       //   do I need to load a palette?
5520       if ( tga_indexed)
5521       {
5522          //   any data to skip? (offset usually = 0)
5523          stbi__skip(s, tga_palette_start );
5524          //   load the palette
5525          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5526          if (!tga_palette) {
5527             STBI_FREE(tga_data);
5528             return stbi__errpuc("outofmem", "Out of memory");
5529          }
5530          if (tga_rgb16) {
5531             stbi_uc *pal_entry = tga_palette;
5532             STBI_ASSERT(tga_comp == STBI_rgb);
5533             for (i=0; i < tga_palette_len; ++i) {
5534                stbi__tga_read_rgb16(s, pal_entry);
5535                pal_entry += tga_comp;
5536             }
5537          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5538                STBI_FREE(tga_data);
5539                STBI_FREE(tga_palette);
5540                return stbi__errpuc("bad palette", "Corrupt TGA");
5541          }
5542       }
5543       //   load the data
5544       for (i=0; i < tga_width * tga_height; ++i)
5545       {
5546          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5547          if ( tga_is_RLE )
5548          {
5549             if ( RLE_count == 0 )
5550             {
5551                //   yep, get the next byte as a RLE command
5552                int RLE_cmd = stbi__get8(s);
5553                RLE_count = 1 + (RLE_cmd & 127);
5554                RLE_repeating = RLE_cmd >> 7;
5555                read_next_pixel = 1;
5556             } else if ( !RLE_repeating )
5557             {
5558                read_next_pixel = 1;
5559             }
5560          } else
5561          {
5562             read_next_pixel = 1;
5563          }
5564          //   OK, if I need to read a pixel, do it now
5565          if ( read_next_pixel )
5566          {
5567             //   load however much data we did have
5568             if ( tga_indexed )
5569             {
5570                // read in index, then perform the lookup
5571                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5572                if ( pal_idx >= tga_palette_len ) {
5573                   // invalid index
5574                   pal_idx = 0;
5575                }
5576                pal_idx *= tga_comp;
5577                for (j = 0; j < tga_comp; ++j) {
5578                   raw_data[j] = tga_palette[pal_idx+j];
5579                }
5580             } else if(tga_rgb16) {
5581                STBI_ASSERT(tga_comp == STBI_rgb);
5582                stbi__tga_read_rgb16(s, raw_data);
5583             } else {
5584                //   read in the data raw
5585                for (j = 0; j < tga_comp; ++j) {
5586                   raw_data[j] = stbi__get8(s);
5587                }
5588             }
5589             //   clear the reading flag for the next pixel
5590             read_next_pixel = 0;
5591          } // end of reading a pixel
5592 
5593          // copy data
5594          for (j = 0; j < tga_comp; ++j)
5595            tga_data[i*tga_comp+j] = raw_data[j];
5596 
5597          //   in case we're in RLE mode, keep counting down
5598          --RLE_count;
5599       }
5600       //   do I need to invert the image?
5601       if ( tga_inverted )
5602       {
5603          for (j = 0; j*2 < tga_height; ++j)
5604          {
5605             int index1 = j * tga_width * tga_comp;
5606             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5607             for (i = tga_width * tga_comp; i > 0; --i)
5608             {
5609                unsigned char temp = tga_data[index1];
5610                tga_data[index1] = tga_data[index2];
5611                tga_data[index2] = temp;
5612                ++index1;
5613                ++index2;
5614             }
5615          }
5616       }
5617       //   clear my palette, if I had one
5618       if ( tga_palette != NULL )
5619       {
5620          STBI_FREE( tga_palette );
5621       }
5622    }
5623 
5624    // swap RGB - if the source data was RGB16, it already is in the right order
5625    if (tga_comp >= 3 && !tga_rgb16)
5626    {
5627       unsigned char* tga_pixel = tga_data;
5628       for (i=0; i < tga_width * tga_height; ++i)
5629       {
5630          unsigned char temp = tga_pixel[0];
5631          tga_pixel[0] = tga_pixel[2];
5632          tga_pixel[2] = temp;
5633          tga_pixel += tga_comp;
5634       }
5635    }
5636 
5637    // convert to target component count
5638    if (req_comp && req_comp != tga_comp)
5639       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5640 
5641    //   the things I do to get rid of an error message, and yet keep
5642    //   Microsoft's C compilers happy... [8^(
5643    tga_palette_start = tga_palette_len = tga_palette_bits =
5644          tga_x_origin = tga_y_origin = 0;
5645    //   OK, done
5646    return tga_data;
5647 }
5648 #endif
5649 
5650 // *************************************************************************************************
5651 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5652 
5653 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5654 static int stbi__psd_test(stbi__context *s)
5655 {
5656    int r = (stbi__get32be(s) == 0x38425053);
5657    stbi__rewind(s);
5658    return r;
5659 }
5660 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5661 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5662 {
5663    int count, nleft, len;
5664 
5665    count = 0;
5666    while ((nleft = pixelCount - count) > 0) {
5667       len = stbi__get8(s);
5668       if (len == 128) {
5669          // No-op.
5670       } else if (len < 128) {
5671          // Copy next len+1 bytes literally.
5672          len++;
5673          if (len > nleft) return 0; // corrupt data
5674          count += len;
5675          while (len) {
5676             *p = stbi__get8(s);
5677             p += 4;
5678             len--;
5679          }
5680       } else if (len > 128) {
5681          stbi_uc   val;
5682          // Next -len+1 bytes in the dest are replicated from next source byte.
5683          // (Interpret len as a negative 8-bit int.)
5684          len = 257 - len;
5685          if (len > nleft) return 0; // corrupt data
5686          val = stbi__get8(s);
5687          count += len;
5688          while (len) {
5689             *p = val;
5690             p += 4;
5691             len--;
5692          }
5693       }
5694    }
5695 
5696    return 1;
5697 }
5698 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5699 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5700 {
5701    int pixelCount;
5702    int channelCount, compression;
5703    int channel, i;
5704    int bitdepth;
5705    int w,h;
5706    stbi_uc *out;
5707    STBI_NOTUSED(ri);
5708 
5709    // Check identifier
5710    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5711       return stbi__errpuc("not PSD", "Corrupt PSD image");
5712 
5713    // Check file type version.
5714    if (stbi__get16be(s) != 1)
5715       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5716 
5717    // Skip 6 reserved bytes.
5718    stbi__skip(s, 6 );
5719 
5720    // Read the number of channels (R, G, B, A, etc).
5721    channelCount = stbi__get16be(s);
5722    if (channelCount < 0 || channelCount > 16)
5723       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5724 
5725    // Read the rows and columns of the image.
5726    h = stbi__get32be(s);
5727    w = stbi__get32be(s);
5728 
5729    // Make sure the depth is 8 bits.
5730    bitdepth = stbi__get16be(s);
5731    if (bitdepth != 8 && bitdepth != 16)
5732       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5733 
5734    // Make sure the color mode is RGB.
5735    // Valid options are:
5736    //   0: Bitmap
5737    //   1: Grayscale
5738    //   2: Indexed color
5739    //   3: RGB color
5740    //   4: CMYK color
5741    //   7: Multichannel
5742    //   8: Duotone
5743    //   9: Lab color
5744    if (stbi__get16be(s) != 3)
5745       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5746 
5747    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5748    stbi__skip(s,stbi__get32be(s) );
5749 
5750    // Skip the image resources.  (resolution, pen tool paths, etc)
5751    stbi__skip(s, stbi__get32be(s) );
5752 
5753    // Skip the reserved data.
5754    stbi__skip(s, stbi__get32be(s) );
5755 
5756    // Find out if the data is compressed.
5757    // Known values:
5758    //   0: no compression
5759    //   1: RLE compressed
5760    compression = stbi__get16be(s);
5761    if (compression > 1)
5762       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5763 
5764    // Check size
5765    if (!stbi__mad3sizes_valid(4, w, h, 0))
5766       return stbi__errpuc("too large", "Corrupt PSD");
5767 
5768    // Create the destination image.
5769 
5770    if (!compression && bitdepth == 16 && bpc == 16) {
5771       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5772       ri->bits_per_channel = 16;
5773    } else
5774       out = (stbi_uc *) stbi__malloc(4 * w*h);
5775 
5776    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5777    pixelCount = w*h;
5778 
5779    // Initialize the data to zero.
5780    //memset( out, 0, pixelCount * 4 );
5781 
5782    // Finally, the image data.
5783    if (compression) {
5784       // RLE as used by .PSD and .TIFF
5785       // Loop until you get the number of unpacked bytes you are expecting:
5786       //     Read the next source byte into n.
5787       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5788       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5789       //     Else if n is 128, noop.
5790       // Endloop
5791 
5792       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5793       // which we're going to just skip.
5794       stbi__skip(s, h * channelCount * 2 );
5795 
5796       // Read the RLE data by channel.
5797       for (channel = 0; channel < 4; channel++) {
5798          stbi_uc *p;
5799 
5800          p = out+channel;
5801          if (channel >= channelCount) {
5802             // Fill this channel with default data.
5803             for (i = 0; i < pixelCount; i++, p += 4)
5804                *p = (channel == 3 ? 255 : 0);
5805          } else {
5806             // Read the RLE data.
5807             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5808                STBI_FREE(out);
5809                return stbi__errpuc("corrupt", "bad RLE data");
5810             }
5811          }
5812       }
5813 
5814    } else {
5815       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5816       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5817 
5818       // Read the data by channel.
5819       for (channel = 0; channel < 4; channel++) {
5820          if (channel >= channelCount) {
5821             // Fill this channel with default data.
5822             if (bitdepth == 16 && bpc == 16) {
5823                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5824                stbi__uint16 val = channel == 3 ? 65535 : 0;
5825                for (i = 0; i < pixelCount; i++, q += 4)
5826                   *q = val;
5827             } else {
5828                stbi_uc *p = out+channel;
5829                stbi_uc val = channel == 3 ? 255 : 0;
5830                for (i = 0; i < pixelCount; i++, p += 4)
5831                   *p = val;
5832             }
5833          } else {
5834             if (ri->bits_per_channel == 16) {    // output bpc
5835                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5836                for (i = 0; i < pixelCount; i++, q += 4)
5837                   *q = (stbi__uint16) stbi__get16be(s);
5838             } else {
5839                stbi_uc *p = out+channel;
5840                if (bitdepth == 16) {  // input bpc
5841                   for (i = 0; i < pixelCount; i++, p += 4)
5842                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5843                } else {
5844                   for (i = 0; i < pixelCount; i++, p += 4)
5845                      *p = stbi__get8(s);
5846                }
5847             }
5848          }
5849       }
5850    }
5851 
5852    // remove weird white matte from PSD
5853    if (channelCount >= 4) {
5854       if (ri->bits_per_channel == 16) {
5855          for (i=0; i < w*h; ++i) {
5856             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5857             if (pixel[3] != 0 && pixel[3] != 65535) {
5858                float a = pixel[3] / 65535.0f;
5859                float ra = 1.0f / a;
5860                float inv_a = 65535.0f * (1 - ra);
5861                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5862                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5863                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5864             }
5865          }
5866       } else {
5867          for (i=0; i < w*h; ++i) {
5868             unsigned char *pixel = out + 4*i;
5869             if (pixel[3] != 0 && pixel[3] != 255) {
5870                float a = pixel[3] / 255.0f;
5871                float ra = 1.0f / a;
5872                float inv_a = 255.0f * (1 - ra);
5873                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5874                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5875                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5876             }
5877          }
5878       }
5879    }
5880 
5881    // convert to desired output format
5882    if (req_comp && req_comp != 4) {
5883       if (ri->bits_per_channel == 16)
5884          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5885       else
5886          out = stbi__convert_format(out, 4, req_comp, w, h);
5887       if (out == NULL) return out; // stbi__convert_format frees input on failure
5888    }
5889 
5890    if (comp) *comp = 4;
5891    *y = h;
5892    *x = w;
5893 
5894    return out;
5895 }
5896 #endif
5897 
5898 // *************************************************************************************************
5899 // Softimage PIC loader
5900 // by Tom Seddon
5901 //
5902 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5903 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5904 
5905 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5906 static int stbi__pic_is4(stbi__context *s,const char *str)
5907 {
5908    int i;
5909    for (i=0; i<4; ++i)
5910       if (stbi__get8(s) != (stbi_uc)str[i])
5911          return 0;
5912 
5913    return 1;
5914 }
5915 
stbi__pic_test_core(stbi__context * s)5916 static int stbi__pic_test_core(stbi__context *s)
5917 {
5918    int i;
5919 
5920    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5921       return 0;
5922 
5923    for(i=0;i<84;++i)
5924       stbi__get8(s);
5925 
5926    if (!stbi__pic_is4(s,"PICT"))
5927       return 0;
5928 
5929    return 1;
5930 }
5931 
5932 typedef struct
5933 {
5934    stbi_uc size,type,channel;
5935 } stbi__pic_packet;
5936 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5937 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5938 {
5939    int mask=0x80, i;
5940 
5941    for (i=0; i<4; ++i, mask>>=1) {
5942       if (channel & mask) {
5943          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5944          dest[i]=stbi__get8(s);
5945       }
5946    }
5947 
5948    return dest;
5949 }
5950 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5951 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5952 {
5953    int mask=0x80,i;
5954 
5955    for (i=0;i<4; ++i, mask>>=1)
5956       if (channel&mask)
5957          dest[i]=src[i];
5958 }
5959 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5960 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5961 {
5962    int act_comp=0,num_packets=0,y,chained;
5963    stbi__pic_packet packets[10];
5964 
5965    // this will (should...) cater for even some bizarre stuff like having data
5966     // for the same channel in multiple packets.
5967    do {
5968       stbi__pic_packet *packet;
5969 
5970       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5971          return stbi__errpuc("bad format","too many packets");
5972 
5973       packet = &packets[num_packets++];
5974 
5975       chained = stbi__get8(s);
5976       packet->size    = stbi__get8(s);
5977       packet->type    = stbi__get8(s);
5978       packet->channel = stbi__get8(s);
5979 
5980       act_comp |= packet->channel;
5981 
5982       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5983       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5984    } while (chained);
5985 
5986    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5987 
5988    for(y=0; y<height; ++y) {
5989       int packet_idx;
5990 
5991       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5992          stbi__pic_packet *packet = &packets[packet_idx];
5993          stbi_uc *dest = result+y*width*4;
5994 
5995          switch (packet->type) {
5996             default:
5997                return stbi__errpuc("bad format","packet has bad compression type");
5998 
5999             case 0: {//uncompressed
6000                int x;
6001 
6002                for(x=0;x<width;++x, dest+=4)
6003                   if (!stbi__readval(s,packet->channel,dest))
6004                      return 0;
6005                break;
6006             }
6007 
6008             case 1://Pure RLE
6009                {
6010                   int left=width, i;
6011 
6012                   while (left>0) {
6013                      stbi_uc count,value[4];
6014 
6015                      count=stbi__get8(s);
6016                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
6017 
6018                      if (count > left)
6019                         count = (stbi_uc) left;
6020 
6021                      if (!stbi__readval(s,packet->channel,value))  return 0;
6022 
6023                      for(i=0; i<count; ++i,dest+=4)
6024                         stbi__copyval(packet->channel,dest,value);
6025                      left -= count;
6026                   }
6027                }
6028                break;
6029 
6030             case 2: {//Mixed RLE
6031                int left=width;
6032                while (left>0) {
6033                   int count = stbi__get8(s), i;
6034                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
6035 
6036                   if (count >= 128) { // Repeated
6037                      stbi_uc value[4];
6038 
6039                      if (count==128)
6040                         count = stbi__get16be(s);
6041                      else
6042                         count -= 127;
6043                      if (count > left)
6044                         return stbi__errpuc("bad file","scanline overrun");
6045 
6046                      if (!stbi__readval(s,packet->channel,value))
6047                         return 0;
6048 
6049                      for(i=0;i<count;++i, dest += 4)
6050                         stbi__copyval(packet->channel,dest,value);
6051                   } else { // Raw
6052                      ++count;
6053                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
6054 
6055                      for(i=0;i<count;++i, dest+=4)
6056                         if (!stbi__readval(s,packet->channel,dest))
6057                            return 0;
6058                   }
6059                   left-=count;
6060                }
6061                break;
6062             }
6063          }
6064       }
6065    }
6066 
6067    return result;
6068 }
6069 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)6070 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6071 {
6072    stbi_uc *result;
6073    int i, x,y, internal_comp;
6074    STBI_NOTUSED(ri);
6075 
6076    if (!comp) comp = &internal_comp;
6077 
6078    for (i=0; i<92; ++i)
6079       stbi__get8(s);
6080 
6081    x = stbi__get16be(s);
6082    y = stbi__get16be(s);
6083    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6084    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6085 
6086    stbi__get32be(s); //skip `ratio'
6087    stbi__get16be(s); //skip `fields'
6088    stbi__get16be(s); //skip `pad'
6089 
6090    // intermediate buffer is RGBA
6091    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6092    memset(result, 0xff, x*y*4);
6093 
6094    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6095       STBI_FREE(result);
6096       result=0;
6097    }
6098    *px = x;
6099    *py = y;
6100    if (req_comp == 0) req_comp = *comp;
6101    result=stbi__convert_format(result,4,req_comp,x,y);
6102 
6103    return result;
6104 }
6105 
stbi__pic_test(stbi__context * s)6106 static int stbi__pic_test(stbi__context *s)
6107 {
6108    int r = stbi__pic_test_core(s);
6109    stbi__rewind(s);
6110    return r;
6111 }
6112 #endif
6113 
6114 // *************************************************************************************************
6115 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6116 
6117 #ifndef STBI_NO_GIF
6118 typedef struct
6119 {
6120    stbi__int16 prefix;
6121    stbi_uc first;
6122    stbi_uc suffix;
6123 } stbi__gif_lzw;
6124 
6125 typedef struct
6126 {
6127    int w,h;
6128    stbi_uc *out;                 // output buffer (always 4 components)
6129    stbi_uc *background;          // The current "background" as far as a gif is concerned
6130    stbi_uc *history;
6131    int flags, bgindex, ratio, transparent, eflags;
6132    stbi_uc  pal[256][4];
6133    stbi_uc lpal[256][4];
6134    stbi__gif_lzw codes[8192];
6135    stbi_uc *color_table;
6136    int parse, step;
6137    int lflags;
6138    int start_x, start_y;
6139    int max_x, max_y;
6140    int cur_x, cur_y;
6141    int line_size;
6142    int delay;
6143 } stbi__gif;
6144 
stbi__gif_test_raw(stbi__context * s)6145 static int stbi__gif_test_raw(stbi__context *s)
6146 {
6147    int sz;
6148    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6149    sz = stbi__get8(s);
6150    if (sz != '9' && sz != '7') return 0;
6151    if (stbi__get8(s) != 'a') return 0;
6152    return 1;
6153 }
6154 
stbi__gif_test(stbi__context * s)6155 static int stbi__gif_test(stbi__context *s)
6156 {
6157    int r = stbi__gif_test_raw(s);
6158    stbi__rewind(s);
6159    return r;
6160 }
6161 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6162 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6163 {
6164    int i;
6165    for (i=0; i < num_entries; ++i) {
6166       pal[i][2] = stbi__get8(s);
6167       pal[i][1] = stbi__get8(s);
6168       pal[i][0] = stbi__get8(s);
6169       pal[i][3] = transp == i ? 0 : 255;
6170    }
6171 }
6172 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6173 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6174 {
6175    stbi_uc version;
6176    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6177       return stbi__err("not GIF", "Corrupt GIF");
6178 
6179    version = stbi__get8(s);
6180    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6181    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6182 
6183    stbi__g_failure_reason = "";
6184    g->w = stbi__get16le(s);
6185    g->h = stbi__get16le(s);
6186    g->flags = stbi__get8(s);
6187    g->bgindex = stbi__get8(s);
6188    g->ratio = stbi__get8(s);
6189    g->transparent = -1;
6190 
6191    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6192 
6193    if (is_info) return 1;
6194 
6195    if (g->flags & 0x80)
6196       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6197 
6198    return 1;
6199 }
6200 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6201 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6202 {
6203    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6204    if (!stbi__gif_header(s, g, comp, 1)) {
6205       STBI_FREE(g);
6206       stbi__rewind( s );
6207       return 0;
6208    }
6209    if (x) *x = g->w;
6210    if (y) *y = g->h;
6211    STBI_FREE(g);
6212    return 1;
6213 }
6214 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6215 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6216 {
6217    stbi_uc *p, *c;
6218    int idx;
6219 
6220    // recurse to decode the prefixes, since the linked-list is backwards,
6221    // and working backwards through an interleaved image would be nasty
6222    if (g->codes[code].prefix >= 0)
6223       stbi__out_gif_code(g, g->codes[code].prefix);
6224 
6225    if (g->cur_y >= g->max_y) return;
6226 
6227    idx = g->cur_x + g->cur_y;
6228    p = &g->out[idx];
6229    g->history[idx / 4] = 1;
6230 
6231    c = &g->color_table[g->codes[code].suffix * 4];
6232    if (c[3] > 128) { // don't render transparent pixels;
6233       p[0] = c[2];
6234       p[1] = c[1];
6235       p[2] = c[0];
6236       p[3] = c[3];
6237    }
6238    g->cur_x += 4;
6239 
6240    if (g->cur_x >= g->max_x) {
6241       g->cur_x = g->start_x;
6242       g->cur_y += g->step;
6243 
6244       while (g->cur_y >= g->max_y && g->parse > 0) {
6245          g->step = (1 << g->parse) * g->line_size;
6246          g->cur_y = g->start_y + (g->step >> 1);
6247          --g->parse;
6248       }
6249    }
6250 }
6251 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6252 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6253 {
6254    stbi_uc lzw_cs;
6255    stbi__int32 len, init_code;
6256    stbi__uint32 first;
6257    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6258    stbi__gif_lzw *p;
6259 
6260    lzw_cs = stbi__get8(s);
6261    if (lzw_cs > 12) return NULL;
6262    clear = 1 << lzw_cs;
6263    first = 1;
6264    codesize = lzw_cs + 1;
6265    codemask = (1 << codesize) - 1;
6266    bits = 0;
6267    valid_bits = 0;
6268    for (init_code = 0; init_code < clear; init_code++) {
6269       g->codes[init_code].prefix = -1;
6270       g->codes[init_code].first = (stbi_uc) init_code;
6271       g->codes[init_code].suffix = (stbi_uc) init_code;
6272    }
6273 
6274    // support no starting clear code
6275    avail = clear+2;
6276    oldcode = -1;
6277 
6278    len = 0;
6279    for(;;) {
6280       if (valid_bits < codesize) {
6281          if (len == 0) {
6282             len = stbi__get8(s); // start new block
6283             if (len == 0)
6284                return g->out;
6285          }
6286          --len;
6287          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6288          valid_bits += 8;
6289       } else {
6290          stbi__int32 code = bits & codemask;
6291          bits >>= codesize;
6292          valid_bits -= codesize;
6293          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6294          if (code == clear) {  // clear code
6295             codesize = lzw_cs + 1;
6296             codemask = (1 << codesize) - 1;
6297             avail = clear + 2;
6298             oldcode = -1;
6299             first = 0;
6300          } else if (code == clear + 1) { // end of stream code
6301             stbi__skip(s, len);
6302             while ((len = stbi__get8(s)) > 0)
6303                stbi__skip(s,len);
6304             return g->out;
6305          } else if (code <= avail) {
6306             if (first) {
6307                return stbi__errpuc("no clear code", "Corrupt GIF");
6308             }
6309 
6310             if (oldcode >= 0) {
6311                p = &g->codes[avail++];
6312                if (avail > 8192) {
6313                   return stbi__errpuc("too many codes", "Corrupt GIF");
6314                }
6315 
6316                p->prefix = (stbi__int16) oldcode;
6317                p->first = g->codes[oldcode].first;
6318                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6319             } else if (code == avail)
6320                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6321 
6322             stbi__out_gif_code(g, (stbi__uint16) code);
6323 
6324             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6325                codesize++;
6326                codemask = (1 << codesize) - 1;
6327             }
6328 
6329             oldcode = code;
6330          } else {
6331             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6332          }
6333       }
6334    }
6335 }
6336 
6337 // this function is designed to support animated gifs, although stb_image doesn't support it
6338 // two back is the image from two frames ago, used for a very specific disposal format
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp,stbi_uc * two_back)6339 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6340 {
6341    int dispose;
6342    int first_frame;
6343    int pi;
6344    int pcount;
6345 
6346    // on first frame, any non-written pixels get the background colour (non-transparent)
6347    first_frame = 0;
6348    if (g->out == 0) {
6349       if (!stbi__gif_header(s, g, comp,0))     return 0; // stbi__g_failure_reason set by stbi__gif_header
6350       g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6351       g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6352       g->history = (stbi_uc *) stbi__malloc(g->w * g->h);
6353       if (g->out == 0)                      return stbi__errpuc("outofmem", "Out of memory");
6354 
6355       // image is treated as "tranparent" at the start - ie, nothing overwrites the current background;
6356       // background colour is only used for pixels that are not rendered first frame, after that "background"
6357       // color refers to teh color that was there the previous frame.
6358       memset( g->out, 0x00, 4 * g->w * g->h );
6359       memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent)
6360       memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6361       first_frame = 1;
6362    } else {
6363       // second frame - how do we dispoase of the previous one?
6364       dispose = (g->eflags & 0x1C) >> 2;
6365       pcount = g->w * g->h;
6366 
6367       if ((dispose == 3) && (two_back == 0)) {
6368          dispose = 2; // if I don't have an image to revert back to, default to the old background
6369       }
6370 
6371       if (dispose == 3) { // use previous graphic
6372          for (pi = 0; pi < pcount; ++pi) {
6373             if (g->history[pi]) {
6374                memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6375             }
6376          }
6377       } else if (dispose == 2) {
6378          // restore what was changed last frame to background before that frame;
6379          for (pi = 0; pi < pcount; ++pi) {
6380             if (g->history[pi]) {
6381                memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6382             }
6383          }
6384       } else {
6385          // This is a non-disposal case eithe way, so just
6386          // leave the pixels as is, and they will become the new background
6387          // 1: do not dispose
6388          // 0:  not specified.
6389       }
6390 
6391       // background is what out is after the undoing of the previou frame;
6392       memcpy( g->background, g->out, 4 * g->w * g->h );
6393    }
6394 
6395    // clear my history;
6396    memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6397 
6398    for (;;) {
6399       int tag = stbi__get8(s);
6400       switch (tag) {
6401          case 0x2C: /* Image Descriptor */
6402          {
6403             stbi__int32 x, y, w, h;
6404             stbi_uc *o;
6405 
6406             x = stbi__get16le(s);
6407             y = stbi__get16le(s);
6408             w = stbi__get16le(s);
6409             h = stbi__get16le(s);
6410             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6411                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6412 
6413             g->line_size = g->w * 4;
6414             g->start_x = x * 4;
6415             g->start_y = y * g->line_size;
6416             g->max_x   = g->start_x + w * 4;
6417             g->max_y   = g->start_y + h * g->line_size;
6418             g->cur_x   = g->start_x;
6419             g->cur_y   = g->start_y;
6420 
6421             g->lflags = stbi__get8(s);
6422 
6423             if (g->lflags & 0x40) {
6424                g->step = 8 * g->line_size; // first interlaced spacing
6425                g->parse = 3;
6426             } else {
6427                g->step = g->line_size;
6428                g->parse = 0;
6429             }
6430 
6431             if (g->lflags & 0x80) {
6432                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6433                g->color_table = (stbi_uc *) g->lpal;
6434             } else if (g->flags & 0x80) {
6435                g->color_table = (stbi_uc *) g->pal;
6436             } else
6437                return stbi__errpuc("missing color table", "Corrupt GIF");
6438 
6439             o = stbi__process_gif_raster(s, g);
6440             if (o == NULL) return NULL;
6441 
6442             // if this was the first frame,
6443             pcount = g->w * g->h;
6444             if (first_frame && (g->bgindex > 0)) {
6445                // if first frame, any pixel not drawn to gets the background color
6446                for (pi = 0; pi < pcount; ++pi) {
6447                   if (g->history[pi] == 0) {
6448                      g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6449                      memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6450                   }
6451                }
6452             }
6453 
6454             return o;
6455          }
6456 
6457          case 0x21: // Comment Extension.
6458          {
6459             int len;
6460             int ext = stbi__get8(s);
6461             if (ext == 0xF9) { // Graphic Control Extension.
6462                len = stbi__get8(s);
6463                if (len == 4) {
6464                   g->eflags = stbi__get8(s);
6465                   g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6466 
6467                   // unset old transparent
6468                   if (g->transparent >= 0) {
6469                      g->pal[g->transparent][3] = 255;
6470                   }
6471                   if (g->eflags & 0x01) {
6472                      g->transparent = stbi__get8(s);
6473                      if (g->transparent >= 0) {
6474                         g->pal[g->transparent][3] = 0;
6475                      }
6476                   } else {
6477                      // don't need transparent
6478                      stbi__skip(s, 1);
6479                      g->transparent = -1;
6480                   }
6481                } else {
6482                   stbi__skip(s, len);
6483                   break;
6484                }
6485             }
6486             while ((len = stbi__get8(s)) != 0) {
6487                stbi__skip(s, len);
6488             }
6489             break;
6490          }
6491 
6492          case 0x3B: // gif stream termination code
6493             return (stbi_uc *) s; // using '1' causes warning on some compilers
6494 
6495          default:
6496             return stbi__errpuc("unknown code", "Corrupt GIF");
6497       }
6498    }
6499 }
6500 
stbi__load_gif_main(stbi__context * s,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)6501 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6502 {
6503    if (stbi__gif_test(s)) {
6504       int layers = 0;
6505       stbi_uc *u = 0;
6506       stbi_uc *out = 0;
6507       stbi_uc *two_back = 0;
6508       stbi__gif g;
6509       int stride;
6510       memset(&g, 0, sizeof(g));
6511       if (delays) {
6512          *delays = 0;
6513       }
6514 
6515       do {
6516          u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6517          if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6518 
6519          if (u) {
6520             *x = g.w;
6521             *y = g.h;
6522             ++layers;
6523             stride = g.w * g.h * 4;
6524 
6525             if (out) {
6526                out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
6527                if (delays) {
6528                   *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
6529                }
6530             } else {
6531                out = (stbi_uc*)stbi__malloc( layers * stride );
6532                if (delays) {
6533                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
6534                }
6535             }
6536             memcpy( out + ((layers - 1) * stride), u, stride );
6537             if (layers >= 2) {
6538                two_back = out - 2 * stride;
6539             }
6540 
6541             if (delays) {
6542                (*delays)[layers - 1U] = g.delay;
6543             }
6544          }
6545       } while (u != 0);
6546 
6547       // free temp buffer;
6548       STBI_FREE(g.out);
6549       STBI_FREE(g.history);
6550       STBI_FREE(g.background);
6551 
6552       // do the final conversion after loading everything;
6553       if (req_comp && req_comp != 4)
6554          out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6555 
6556       *z = layers;
6557       return out;
6558    } else {
6559       return stbi__errpuc("not GIF", "Image was not as a gif type.");
6560    }
6561 }
6562 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6563 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6564 {
6565    stbi_uc *u = 0;
6566    stbi__gif g;
6567    memset(&g, 0, sizeof(g));
6568 
6569    u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6570    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6571    if (u) {
6572       *x = g.w;
6573       *y = g.h;
6574 
6575       // moved conversion to after successful load so that the same
6576       // can be done for multiple frames.
6577       if (req_comp && req_comp != 4)
6578          u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6579    }
6580 
6581    // free buffers needed for multiple frame loading;
6582    STBI_FREE(g.history);
6583    STBI_FREE(g.background);
6584 
6585    return u;
6586 }
6587 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6588 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6589 {
6590    return stbi__gif_info_raw(s,x,y,comp);
6591 }
6592 #endif
6593 
6594 // *************************************************************************************************
6595 // Radiance RGBE HDR loader
6596 // originally by Nicolas Schulz
6597 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6598 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6599 {
6600    int i;
6601    for (i=0; signature[i]; ++i)
6602       if (stbi__get8(s) != signature[i])
6603           return 0;
6604    stbi__rewind(s);
6605    return 1;
6606 }
6607 
stbi__hdr_test(stbi__context * s)6608 static int stbi__hdr_test(stbi__context* s)
6609 {
6610    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6611    stbi__rewind(s);
6612    if(!r) {
6613        r = stbi__hdr_test_core(s, "#?RGBE\n");
6614        stbi__rewind(s);
6615    }
6616    return r;
6617 }
6618 
6619 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6620 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6621 {
6622    int len=0;
6623    char c = '\0';
6624 
6625    c = (char) stbi__get8(z);
6626 
6627    while (!stbi__at_eof(z) && c != '\n') {
6628       buffer[len++] = c;
6629       if (len == STBI__HDR_BUFLEN-1) {
6630          // flush to end of line
6631          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6632             ;
6633          break;
6634       }
6635       c = (char) stbi__get8(z);
6636    }
6637 
6638    buffer[len] = 0;
6639    return buffer;
6640 }
6641 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6642 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6643 {
6644    if ( input[3] != 0 ) {
6645       float f1;
6646       // Exponent
6647       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6648       if (req_comp <= 2)
6649          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6650       else {
6651          output[0] = input[0] * f1;
6652          output[1] = input[1] * f1;
6653          output[2] = input[2] * f1;
6654       }
6655       if (req_comp == 2) output[1] = 1;
6656       if (req_comp == 4) output[3] = 1;
6657    } else {
6658       switch (req_comp) {
6659          case 4: output[3] = 1; /* fallthrough */
6660          case 3: output[0] = output[1] = output[2] = 0;
6661                  break;
6662          case 2: output[1] = 1; /* fallthrough */
6663          case 1: output[0] = 0;
6664                  break;
6665       }
6666    }
6667 }
6668 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6669 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6670 {
6671    char buffer[STBI__HDR_BUFLEN];
6672    char *token;
6673    int valid = 0;
6674    int width, height;
6675    stbi_uc *scanline;
6676    float *hdr_data;
6677    int len;
6678    unsigned char count, value;
6679    int i, j, k, c1,c2, z;
6680    const char *headerToken;
6681    STBI_NOTUSED(ri);
6682 
6683    // Check identifier
6684    headerToken = stbi__hdr_gettoken(s,buffer);
6685    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6686       return stbi__errpf("not HDR", "Corrupt HDR image");
6687 
6688    // Parse header
6689    for(;;) {
6690       token = stbi__hdr_gettoken(s,buffer);
6691       if (token[0] == 0) break;
6692       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6693    }
6694 
6695    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6696 
6697    // Parse width and height
6698    // can't use sscanf() if we're not using stdio!
6699    token = stbi__hdr_gettoken(s,buffer);
6700    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6701    token += 3;
6702    height = (int) strtol(token, &token, 10);
6703    while (*token == ' ') ++token;
6704    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6705    token += 3;
6706    width = (int) strtol(token, NULL, 10);
6707 
6708    *x = width;
6709    *y = height;
6710 
6711    if (comp) *comp = 3;
6712    if (req_comp == 0) req_comp = 3;
6713 
6714    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6715       return stbi__errpf("too large", "HDR image is too large");
6716 
6717    // Read data
6718    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6719    if (!hdr_data)
6720       return stbi__errpf("outofmem", "Out of memory");
6721 
6722    // Load image data
6723    // image data is stored as some number of sca
6724    if ( width < 8 || width >= 32768) {
6725       // Read flat data
6726       for (j=0; j < height; ++j) {
6727          for (i=0; i < width; ++i) {
6728             stbi_uc rgbe[4];
6729            main_decode_loop:
6730             stbi__getn(s, rgbe, 4);
6731             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6732          }
6733       }
6734    } else {
6735       // Read RLE-encoded data
6736       scanline = NULL;
6737 
6738       for (j = 0; j < height; ++j) {
6739          c1 = stbi__get8(s);
6740          c2 = stbi__get8(s);
6741          len = stbi__get8(s);
6742          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6743             // not run-length encoded, so we have to actually use THIS data as a decoded
6744             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6745             stbi_uc rgbe[4];
6746             rgbe[0] = (stbi_uc) c1;
6747             rgbe[1] = (stbi_uc) c2;
6748             rgbe[2] = (stbi_uc) len;
6749             rgbe[3] = (stbi_uc) stbi__get8(s);
6750             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6751             i = 1;
6752             j = 0;
6753             STBI_FREE(scanline);
6754             goto main_decode_loop; // yes, this makes no sense
6755          }
6756          len <<= 8;
6757          len |= stbi__get8(s);
6758          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6759          if (scanline == NULL) {
6760             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6761             if (!scanline) {
6762                STBI_FREE(hdr_data);
6763                return stbi__errpf("outofmem", "Out of memory");
6764             }
6765          }
6766 
6767          for (k = 0; k < 4; ++k) {
6768             int nleft;
6769             i = 0;
6770             while ((nleft = width - i) > 0) {
6771                count = stbi__get8(s);
6772                if (count > 128) {
6773                   // Run
6774                   value = stbi__get8(s);
6775                   count -= 128;
6776                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6777                   for (z = 0; z < count; ++z)
6778                      scanline[i++ * 4 + k] = value;
6779                } else {
6780                   // Dump
6781                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6782                   for (z = 0; z < count; ++z)
6783                      scanline[i++ * 4 + k] = stbi__get8(s);
6784                }
6785             }
6786          }
6787          for (i=0; i < width; ++i)
6788             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6789       }
6790       if (scanline)
6791          STBI_FREE(scanline);
6792    }
6793 
6794    return hdr_data;
6795 }
6796 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6797 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6798 {
6799    char buffer[STBI__HDR_BUFLEN];
6800    char *token;
6801    int valid = 0;
6802    int dummy;
6803 
6804    if (!x) x = &dummy;
6805    if (!y) y = &dummy;
6806    if (!comp) comp = &dummy;
6807 
6808    if (stbi__hdr_test(s) == 0) {
6809        stbi__rewind( s );
6810        return 0;
6811    }
6812 
6813    for(;;) {
6814       token = stbi__hdr_gettoken(s,buffer);
6815       if (token[0] == 0) break;
6816       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6817    }
6818 
6819    if (!valid) {
6820        stbi__rewind( s );
6821        return 0;
6822    }
6823    token = stbi__hdr_gettoken(s,buffer);
6824    if (strncmp(token, "-Y ", 3)) {
6825        stbi__rewind( s );
6826        return 0;
6827    }
6828    token += 3;
6829    *y = (int) strtol(token, &token, 10);
6830    while (*token == ' ') ++token;
6831    if (strncmp(token, "+X ", 3)) {
6832        stbi__rewind( s );
6833        return 0;
6834    }
6835    token += 3;
6836    *x = (int) strtol(token, NULL, 10);
6837    *comp = 3;
6838    return 1;
6839 }
6840 #endif // STBI_NO_HDR
6841 
6842 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6843 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6844 {
6845    void *p;
6846    stbi__bmp_data info;
6847 
6848    info.all_a = 255;
6849    p = stbi__bmp_parse_header(s, &info);
6850    stbi__rewind( s );
6851    if (p == NULL)
6852       return 0;
6853    if (x) *x = s->img_x;
6854    if (y) *y = s->img_y;
6855    if (comp) *comp = info.ma ? 4 : 3;
6856    return 1;
6857 }
6858 #endif
6859 
6860 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6861 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6862 {
6863    int channelCount, dummy, depth;
6864    if (!x) x = &dummy;
6865    if (!y) y = &dummy;
6866    if (!comp) comp = &dummy;
6867    if (stbi__get32be(s) != 0x38425053) {
6868        stbi__rewind( s );
6869        return 0;
6870    }
6871    if (stbi__get16be(s) != 1) {
6872        stbi__rewind( s );
6873        return 0;
6874    }
6875    stbi__skip(s, 6);
6876    channelCount = stbi__get16be(s);
6877    if (channelCount < 0 || channelCount > 16) {
6878        stbi__rewind( s );
6879        return 0;
6880    }
6881    *y = stbi__get32be(s);
6882    *x = stbi__get32be(s);
6883    depth = stbi__get16be(s);
6884    if (depth != 8 && depth != 16) {
6885        stbi__rewind( s );
6886        return 0;
6887    }
6888    if (stbi__get16be(s) != 3) {
6889        stbi__rewind( s );
6890        return 0;
6891    }
6892    *comp = 4;
6893    return 1;
6894 }
6895 
stbi__psd_is16(stbi__context * s)6896 static int stbi__psd_is16(stbi__context *s)
6897 {
6898    int channelCount, depth;
6899    if (stbi__get32be(s) != 0x38425053) {
6900        stbi__rewind( s );
6901        return 0;
6902    }
6903    if (stbi__get16be(s) != 1) {
6904        stbi__rewind( s );
6905        return 0;
6906    }
6907    stbi__skip(s, 6);
6908    channelCount = stbi__get16be(s);
6909    if (channelCount < 0 || channelCount > 16) {
6910        stbi__rewind( s );
6911        return 0;
6912    }
6913    (void) stbi__get32be(s);
6914    (void) stbi__get32be(s);
6915    depth = stbi__get16be(s);
6916    if (depth != 16) {
6917        stbi__rewind( s );
6918        return 0;
6919    }
6920    return 1;
6921 }
6922 #endif
6923 
6924 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6925 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6926 {
6927    int act_comp=0,num_packets=0,chained,dummy;
6928    stbi__pic_packet packets[10];
6929 
6930    if (!x) x = &dummy;
6931    if (!y) y = &dummy;
6932    if (!comp) comp = &dummy;
6933 
6934    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6935       stbi__rewind(s);
6936       return 0;
6937    }
6938 
6939    stbi__skip(s, 88);
6940 
6941    *x = stbi__get16be(s);
6942    *y = stbi__get16be(s);
6943    if (stbi__at_eof(s)) {
6944       stbi__rewind( s);
6945       return 0;
6946    }
6947    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6948       stbi__rewind( s );
6949       return 0;
6950    }
6951 
6952    stbi__skip(s, 8);
6953 
6954    do {
6955       stbi__pic_packet *packet;
6956 
6957       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6958          return 0;
6959 
6960       packet = &packets[num_packets++];
6961       chained = stbi__get8(s);
6962       packet->size    = stbi__get8(s);
6963       packet->type    = stbi__get8(s);
6964       packet->channel = stbi__get8(s);
6965       act_comp |= packet->channel;
6966 
6967       if (stbi__at_eof(s)) {
6968           stbi__rewind( s );
6969           return 0;
6970       }
6971       if (packet->size != 8) {
6972           stbi__rewind( s );
6973           return 0;
6974       }
6975    } while (chained);
6976 
6977    *comp = (act_comp & 0x10 ? 4 : 3);
6978 
6979    return 1;
6980 }
6981 #endif
6982 
6983 // *************************************************************************************************
6984 // Portable Gray Map and Portable Pixel Map loader
6985 // by Ken Miller
6986 //
6987 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6988 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6989 //
6990 // Known limitations:
6991 //    Does not support comments in the header section
6992 //    Does not support ASCII image data (formats P2 and P3)
6993 //    Does not support 16-bit-per-channel
6994 
6995 #ifndef STBI_NO_PNM
6996 
stbi__pnm_test(stbi__context * s)6997 static int      stbi__pnm_test(stbi__context *s)
6998 {
6999    char p, t;
7000    p = (char) stbi__get8(s);
7001    t = (char) stbi__get8(s);
7002    if (p != 'P' || (t != '5' && t != '6')) {
7003        stbi__rewind( s );
7004        return 0;
7005    }
7006    return 1;
7007 }
7008 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)7009 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7010 {
7011    stbi_uc *out;
7012    STBI_NOTUSED(ri);
7013 
7014    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
7015       return 0;
7016 
7017    *x = s->img_x;
7018    *y = s->img_y;
7019    if (comp) *comp = s->img_n;
7020 
7021    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7022       return stbi__errpuc("too large", "PNM too large");
7023 
7024    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
7025    if (!out) return stbi__errpuc("outofmem", "Out of memory");
7026    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
7027 
7028    if (req_comp && req_comp != s->img_n) {
7029       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7030       if (out == NULL) return out; // stbi__convert_format frees input on failure
7031    }
7032    return out;
7033 }
7034 
stbi__pnm_isspace(char c)7035 static int      stbi__pnm_isspace(char c)
7036 {
7037    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7038 }
7039 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)7040 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7041 {
7042    for (;;) {
7043       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7044          *c = (char) stbi__get8(s);
7045 
7046       if (stbi__at_eof(s) || *c != '#')
7047          break;
7048 
7049       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7050          *c = (char) stbi__get8(s);
7051    }
7052 }
7053 
stbi__pnm_isdigit(char c)7054 static int      stbi__pnm_isdigit(char c)
7055 {
7056    return c >= '0' && c <= '9';
7057 }
7058 
stbi__pnm_getinteger(stbi__context * s,char * c)7059 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
7060 {
7061    int value = 0;
7062 
7063    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7064       value = value*10 + (*c - '0');
7065       *c = (char) stbi__get8(s);
7066    }
7067 
7068    return value;
7069 }
7070 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)7071 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7072 {
7073    int maxv, dummy;
7074    char c, p, t;
7075 
7076    if (!x) x = &dummy;
7077    if (!y) y = &dummy;
7078    if (!comp) comp = &dummy;
7079 
7080    stbi__rewind(s);
7081 
7082    // Get identifier
7083    p = (char) stbi__get8(s);
7084    t = (char) stbi__get8(s);
7085    if (p != 'P' || (t != '5' && t != '6')) {
7086        stbi__rewind(s);
7087        return 0;
7088    }
7089 
7090    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
7091 
7092    c = (char) stbi__get8(s);
7093    stbi__pnm_skip_whitespace(s, &c);
7094 
7095    *x = stbi__pnm_getinteger(s, &c); // read width
7096    stbi__pnm_skip_whitespace(s, &c);
7097 
7098    *y = stbi__pnm_getinteger(s, &c); // read height
7099    stbi__pnm_skip_whitespace(s, &c);
7100 
7101    maxv = stbi__pnm_getinteger(s, &c);  // read max value
7102 
7103    if (maxv > 255)
7104       return stbi__err("max value > 255", "PPM image not 8-bit");
7105    else
7106       return 1;
7107 }
7108 #endif
7109 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)7110 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7111 {
7112    #ifndef STBI_NO_JPEG
7113    if (stbi__jpeg_info(s, x, y, comp)) return 1;
7114    #endif
7115 
7116    #ifndef STBI_NO_PNG
7117    if (stbi__png_info(s, x, y, comp))  return 1;
7118    #endif
7119 
7120    #ifndef STBI_NO_GIF
7121    if (stbi__gif_info(s, x, y, comp))  return 1;
7122    #endif
7123 
7124    #ifndef STBI_NO_BMP
7125    if (stbi__bmp_info(s, x, y, comp))  return 1;
7126    #endif
7127 
7128    #ifndef STBI_NO_PSD
7129    if (stbi__psd_info(s, x, y, comp))  return 1;
7130    #endif
7131 
7132    #ifndef STBI_NO_PIC
7133    if (stbi__pic_info(s, x, y, comp))  return 1;
7134    #endif
7135 
7136    #ifndef STBI_NO_PNM
7137    if (stbi__pnm_info(s, x, y, comp))  return 1;
7138    #endif
7139 
7140    #ifndef STBI_NO_HDR
7141    if (stbi__hdr_info(s, x, y, comp))  return 1;
7142    #endif
7143 
7144    // test tga last because it's a crappy test!
7145    #ifndef STBI_NO_TGA
7146    if (stbi__tga_info(s, x, y, comp))
7147        return 1;
7148    #endif
7149    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7150 }
7151 
stbi__is_16_main(stbi__context * s)7152 static int stbi__is_16_main(stbi__context *s)
7153 {
7154    #ifndef STBI_NO_PNG
7155    if (stbi__png_is16(s))  return 1;
7156    #endif
7157 
7158    #ifndef STBI_NO_PSD
7159    if (stbi__psd_is16(s))  return 1;
7160    #endif
7161 
7162    return 0;
7163 }
7164 
7165 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)7166 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7167 {
7168     FILE *f = stbi__fopen(filename, "rb");
7169     int result;
7170     if (!f) return stbi__err("can't fopen", "Unable to open file");
7171     result = stbi_info_from_file(f, x, y, comp);
7172     fclose(f);
7173     return result;
7174 }
7175 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)7176 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7177 {
7178    int r;
7179    stbi__context s;
7180    long pos = ftell(f);
7181    stbi__start_file(&s, f);
7182    r = stbi__info_main(&s,x,y,comp);
7183    fseek(f,pos,SEEK_SET);
7184    return r;
7185 }
7186 
stbi_is_16_bit(char const * filename)7187 STBIDEF int stbi_is_16_bit(char const *filename)
7188 {
7189     FILE *f = stbi__fopen(filename, "rb");
7190     int result;
7191     if (!f) return stbi__err("can't fopen", "Unable to open file");
7192     result = stbi_is_16_bit_from_file(f);
7193     fclose(f);
7194     return result;
7195 }
7196 
stbi_is_16_bit_from_file(FILE * f)7197 STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7198 {
7199    int r;
7200    stbi__context s;
7201    long pos = ftell(f);
7202    stbi__start_file(&s, f);
7203    r = stbi__is_16_main(&s);
7204    fseek(f,pos,SEEK_SET);
7205    return r;
7206 }
7207 #endif // !STBI_NO_STDIO
7208 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)7209 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7210 {
7211    stbi__context s;
7212    stbi__start_mem(&s,buffer,len);
7213    return stbi__info_main(&s,x,y,comp);
7214 }
7215 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)7216 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7217 {
7218    stbi__context s;
7219    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7220    return stbi__info_main(&s,x,y,comp);
7221 }
7222 
stbi_is_16_bit_from_memory(stbi_uc const * buffer,int len)7223 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7224 {
7225    stbi__context s;
7226    stbi__start_mem(&s,buffer,len);
7227    return stbi__is_16_main(&s);
7228 }
7229 
stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * c,void * user)7230 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7231 {
7232    stbi__context s;
7233    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7234    return stbi__is_16_main(&s);
7235 }
7236 
7237 #endif // STB_IMAGE_IMPLEMENTATION
7238 
7239 /*
7240    revision history:
7241       2.19  (2018-02-11) fix warning
7242       2.18  (2018-01-30) fix warnings
7243       2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7244                          1-bit BMP
7245                          *_is_16_bit api
7246                          avoid warnings
7247       2.16  (2017-07-23) all functions have 16-bit variants;
7248                          STBI_NO_STDIO works again;
7249                          compilation fixes;
7250                          fix rounding in unpremultiply;
7251                          optimize vertical flip;
7252                          disable raw_len validation;
7253                          documentation fixes
7254       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7255                          warning fixes; disable run-time SSE detection on gcc;
7256                          uniform handling of optional "return" values;
7257                          thread-safe initialization of zlib tables
7258       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7259       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7260       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7261       2.11  (2016-04-02) allocate large structures on the stack
7262                          remove white matting for transparent PSD
7263                          fix reported channel count for PNG & BMP
7264                          re-enable SSE2 in non-gcc 64-bit
7265                          support RGB-formatted JPEG
7266                          read 16-bit PNGs (only as 8-bit)
7267       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7268       2.09  (2016-01-16) allow comments in PNM files
7269                          16-bit-per-pixel TGA (not bit-per-component)
7270                          info() for TGA could break due to .hdr handling
7271                          info() for BMP to shares code instead of sloppy parse
7272                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7273                          code cleanup
7274       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7275       2.07  (2015-09-13) fix compiler warnings
7276                          partial animated GIF support
7277                          limited 16-bpc PSD support
7278                          #ifdef unused functions
7279                          bug with < 92 byte PIC,PNM,HDR,TGA
7280       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7281       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7282       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7283       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7284                          stbi_set_flip_vertically_on_load (nguillemot)
7285                          fix NEON support; fix mingw support
7286       2.02  (2015-01-19) fix incorrect assert, fix warning
7287       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7288       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7289       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7290                          progressive JPEG (stb)
7291                          PGM/PPM support (Ken Miller)
7292                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7293                          GIF bugfix -- seemingly never worked
7294                          STBI_NO_*, STBI_ONLY_*
7295       1.48  (2014-12-14) fix incorrectly-named assert()
7296       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7297                          optimize PNG (ryg)
7298                          fix bug in interlaced PNG with user-specified channel count (stb)
7299       1.46  (2014-08-26)
7300               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7301       1.45  (2014-08-16)
7302               fix MSVC-ARM internal compiler error by wrapping malloc
7303       1.44  (2014-08-07)
7304               various warning fixes from Ronny Chevalier
7305       1.43  (2014-07-15)
7306               fix MSVC-only compiler problem in code changed in 1.42
7307       1.42  (2014-07-09)
7308               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7309               fixes to stbi__cleanup_jpeg path
7310               added STBI_ASSERT to avoid requiring assert.h
7311       1.41  (2014-06-25)
7312               fix search&replace from 1.36 that messed up comments/error messages
7313       1.40  (2014-06-22)
7314               fix gcc struct-initialization warning
7315       1.39  (2014-06-15)
7316               fix to TGA optimization when req_comp != number of components in TGA;
7317               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7318               add support for BMP version 5 (more ignored fields)
7319       1.38  (2014-06-06)
7320               suppress MSVC warnings on integer casts truncating values
7321               fix accidental rename of 'skip' field of I/O
7322       1.37  (2014-06-04)
7323               remove duplicate typedef
7324       1.36  (2014-06-03)
7325               convert to header file single-file library
7326               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7327       1.35  (2014-05-27)
7328               various warnings
7329               fix broken STBI_SIMD path
7330               fix bug where stbi_load_from_file no longer left file pointer in correct place
7331               fix broken non-easy path for 32-bit BMP (possibly never used)
7332               TGA optimization by Arseny Kapoulkine
7333       1.34  (unknown)
7334               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7335       1.33  (2011-07-14)
7336               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7337       1.32  (2011-07-13)
7338               support for "info" function for all supported filetypes (SpartanJ)
7339       1.31  (2011-06-20)
7340               a few more leak fixes, bug in PNG handling (SpartanJ)
7341       1.30  (2011-06-11)
7342               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7343               removed deprecated format-specific test/load functions
7344               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7345               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7346               fix inefficiency in decoding 32-bit BMP (David Woo)
7347       1.29  (2010-08-16)
7348               various warning fixes from Aurelien Pocheville
7349       1.28  (2010-08-01)
7350               fix bug in GIF palette transparency (SpartanJ)
7351       1.27  (2010-08-01)
7352               cast-to-stbi_uc to fix warnings
7353       1.26  (2010-07-24)
7354               fix bug in file buffering for PNG reported by SpartanJ
7355       1.25  (2010-07-17)
7356               refix trans_data warning (Won Chun)
7357       1.24  (2010-07-12)
7358               perf improvements reading from files on platforms with lock-heavy fgetc()
7359               minor perf improvements for jpeg
7360               deprecated type-specific functions so we'll get feedback if they're needed
7361               attempt to fix trans_data warning (Won Chun)
7362       1.23    fixed bug in iPhone support
7363       1.22  (2010-07-10)
7364               removed image *writing* support
7365               stbi_info support from Jetro Lauha
7366               GIF support from Jean-Marc Lienher
7367               iPhone PNG-extensions from James Brown
7368               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7369       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7370       1.20    added support for Softimage PIC, by Tom Seddon
7371       1.19    bug in interlaced PNG corruption check (found by ryg)
7372       1.18  (2008-08-02)
7373               fix a threading bug (local mutable static)
7374       1.17    support interlaced PNG
7375       1.16    major bugfix - stbi__convert_format converted one too many pixels
7376       1.15    initialize some fields for thread safety
7377       1.14    fix threadsafe conversion bug
7378               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7379       1.13    threadsafe
7380       1.12    const qualifiers in the API
7381       1.11    Support installable IDCT, colorspace conversion routines
7382       1.10    Fixes for 64-bit (don't use "unsigned long")
7383               optimized upsampling by Fabian "ryg" Giesen
7384       1.09    Fix format-conversion for PSD code (bad global variables!)
7385       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7386       1.07    attempt to fix C++ warning/errors again
7387       1.06    attempt to fix C++ warning/errors again
7388       1.05    fix TGA loading to return correct *comp and use good luminance calc
7389       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7390       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7391       1.02    support for (subset of) HDR files, float interface for preferred access to them
7392       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7393               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7394       1.00    interface to zlib that skips zlib header
7395       0.99    correct handling of alpha in palette
7396       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7397       0.97    jpeg errors on too large a file; also catch another malloc failure
7398       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7399       0.95    during header scan, seek to markers in case of padding
7400       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7401       0.93    handle jpegtran output; verbose errors
7402       0.92    read 4,8,16,24,32-bit BMP files of several formats
7403       0.91    output 24-bit Windows 3.0 BMP files
7404       0.90    fix a few more warnings; bump version number to approach 1.0
7405       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7406       0.60    fix compiling as c++
7407       0.59    fix warnings: merge Dave Moore's -Wall fixes
7408       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7409       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7410       0.56    fix bug: zlib uncompressed mode len vs. nlen
7411       0.55    fix bug: restart_interval not initialized to 0
7412       0.54    allow NULL for 'int *comp'
7413       0.53    fix bug in png 3->4; speedup png decoding
7414       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7415       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7416               on 'test' only check type, not whether we support this variant
7417       0.50  (2006-11-19)
7418               first released version
7419 */
7420 
7421 
7422 /*
7423 ------------------------------------------------------------------------------
7424 This software is available under 2 licenses -- choose whichever you prefer.
7425 ------------------------------------------------------------------------------
7426 ALTERNATIVE A - MIT License
7427 Copyright (c) 2017 Sean Barrett
7428 Permission is hereby granted, free of charge, to any person obtaining a copy of
7429 this software and associated documentation files (the "Software"), to deal in
7430 the Software without restriction, including without limitation the rights to
7431 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7432 of the Software, and to permit persons to whom the Software is furnished to do
7433 so, subject to the following conditions:
7434 The above copyright notice and this permission notice shall be included in all
7435 copies or substantial portions of the Software.
7436 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7437 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7438 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7439 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7440 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7441 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7442 SOFTWARE.
7443 ------------------------------------------------------------------------------
7444 ALTERNATIVE B - Public Domain (www.unlicense.org)
7445 This is free and unencumbered software released into the public domain.
7446 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7447 software, either in source code form or as a compiled binary, for any purpose,
7448 commercial or non-commercial, and by any means.
7449 In jurisdictions that recognize copyright laws, the author or authors of this
7450 software dedicate any and all copyright interest in the software to the public
7451 domain. We make this dedication for the benefit of the public at large and to
7452 the detriment of our heirs and successors. We intend this dedication to be an
7453 overt act of relinquishment in perpetuity of all present and future rights to
7454 this software under copyright law.
7455 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7456 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7457 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7458 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7459 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7460 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7461 ------------------------------------------------------------------------------
7462 */