1 /* stb_image - v2.18 - public domain image loader - http://nothings.org/stb
2                                   no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.18  (2018-01-30) fix warnings
52       2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
53       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
54       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
55       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
56       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
57       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
58       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
59                          RGB-format JPEG; remove white matting in PSD;
60                          allocate large structures on the stack;
61                          correct channel count for PNG & BMP
62       2.10  (2016-01-22) avoid warning introduced in 2.09
63       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
64 
65    See end of file for full revision history.
66 
67 
68  ============================    Contributors    =========================
69 
70  Image formats                          Extensions, features
71     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
72     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
73     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
74     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
75     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
76     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
77     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
78     github:urraka (animated gif)           Junggon Kim (PNM comments)
79     Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
80                                            socks-the-fox (16-bit PNG)
81                                            Jeremy Sawicki (handle all ImageNet JPGs)
82  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
83     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
84     Arseny Kapoulkine
85     John-Mark Allen
86 
87  Bug & warning fixes
88     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
89     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
90     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
91     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
92     the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
93     Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
94     Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
95     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
96     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
97     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
98     Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
99     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
100     Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
101     Christian Floisand      Kevin Schmidt                         github:darealshinji
102     Blazej Dariusz Roszkowski                                     github:Michaelangel007
103 */
104 
105 #ifndef STBI_INCLUDE_STB_IMAGE_H
106 #define STBI_INCLUDE_STB_IMAGE_H
107 
108 // DOCUMENTATION
109 //
110 // Limitations:
111 //    - no 12-bit-per-channel JPEG
112 //    - no JPEGs with arithmetic coding
113 //    - GIF always returns *comp=4
114 //
115 // Basic usage (see HDR discussion below for HDR usage):
116 //    int x,y,n;
117 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
118 //    // ... process data if not NULL ...
119 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
120 //    // ... replace '0' with '1'..'4' to force that many components per pixel
121 //    // ... but 'n' will always be the number that it would have been if you said 0
122 //    stbi_image_free(data)
123 //
124 // Standard parameters:
125 //    int *x                 -- outputs image width in pixels
126 //    int *y                 -- outputs image height in pixels
127 //    int *channels_in_file  -- outputs # of image components in image file
128 //    int desired_channels   -- if non-zero, # of image components requested in result
129 //
130 // The return value from an image loader is an 'unsigned char *' which points
131 // to the pixel data, or NULL on an allocation failure or if the image is
132 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
133 // with each pixel consisting of N interleaved 8-bit components; the first
134 // pixel pointed to is top-left-most in the image. There is no padding between
135 // image scanlines or between pixels, regardless of format. The number of
136 // components N is 'desired_channels' if desired_channels is non-zero, or
137 // *channels_in_file otherwise. If desired_channels is non-zero,
138 // *channels_in_file has the number of components that _would_ have been
139 // output otherwise. E.g. if you set desired_channels to 4, you will always
140 // get RGBA output, but you can check *channels_in_file to see if it's trivially
141 // opaque because e.g. there were only 3 channels in the source image.
142 //
143 // An output image with N components has the following components interleaved
144 // in this order in each pixel:
145 //
146 //     N=#comp     components
147 //       1           grey
148 //       2           grey, alpha
149 //       3           red, green, blue
150 //       4           red, green, blue, alpha
151 //
152 // If image loading fails for any reason, the return value will be NULL,
153 // and *x, *y, *channels_in_file will be unchanged. The function
154 // stbi_failure_reason() can be queried for an extremely brief, end-user
155 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
156 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
157 // more user-friendly ones.
158 //
159 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
160 //
161 // ===========================================================================
162 //
163 // Philosophy
164 //
165 // stb libraries are designed with the following priorities:
166 //
167 //    1. easy to use
168 //    2. easy to maintain
169 //    3. good performance
170 //
171 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
172 // and for best performance I may provide less-easy-to-use APIs that give higher
173 // performance, in addition to the easy to use ones. Nevertheless, it's important
174 // to keep in mind that from the standpoint of you, a client of this library,
175 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
176 //
177 // Some secondary priorities arise directly from the first two, some of which
178 // make more explicit reasons why performance can't be emphasized.
179 //
180 //    - Portable ("ease of use")
181 //    - Small source code footprint ("easy to maintain")
182 //    - No dependencies ("ease of use")
183 //
184 // ===========================================================================
185 //
186 // I/O callbacks
187 //
188 // I/O callbacks allow you to read from arbitrary sources, like packaged
189 // files or some other source. Data read from callbacks are processed
190 // through a small internal buffer (currently 128 bytes) to try to reduce
191 // overhead.
192 //
193 // The three functions you must define are "read" (reads some bytes of data),
194 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
195 //
196 // ===========================================================================
197 //
198 // SIMD support
199 //
200 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
201 // supported by the compiler. For ARM Neon support, you must explicitly
202 // request it.
203 //
204 // (The old do-it-yourself SIMD API is no longer supported in the current
205 // code.)
206 //
207 // On x86, SSE2 will automatically be used when available based on a run-time
208 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
209 // the typical path is to have separate builds for NEON and non-NEON devices
210 // (at least this is true for iOS and Android). Therefore, the NEON support is
211 // toggled by a build flag: define STBI_NEON to get NEON loops.
212 //
213 // If for some reason you do not want to use any of SIMD code, or if
214 // you have issues compiling it, you can disable it entirely by
215 // defining STBI_NO_SIMD.
216 //
217 // ===========================================================================
218 //
219 // HDR image support   (disable by defining STBI_NO_HDR)
220 //
221 // stb_image now supports loading HDR images in general, and currently
222 // the Radiance .HDR file format, although the support is provided
223 // generically. You can still load any file through the existing interface;
224 // if you attempt to load an HDR file, it will be automatically remapped to
225 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
226 // both of these constants can be reconfigured through this interface:
227 //
228 //     stbi_hdr_to_ldr_gamma(2.2f);
229 //     stbi_hdr_to_ldr_scale(1.0f);
230 //
231 // (note, do not use _inverse_ constants; stbi_image will invert them
232 // appropriately).
233 //
234 // Additionally, there is a new, parallel interface for loading files as
235 // (linear) floats to preserve the full dynamic range:
236 //
237 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
238 //
239 // If you load LDR images through this interface, those images will
240 // be promoted to floating point values, run through the inverse of
241 // constants corresponding to the above:
242 //
243 //     stbi_ldr_to_hdr_scale(1.0f);
244 //     stbi_ldr_to_hdr_gamma(2.2f);
245 //
246 // Finally, given a filename (or an open file or memory block--see header
247 // file for details) containing image data, you can query for the "most
248 // appropriate" interface to use (that is, whether the image is HDR or
249 // not), using:
250 //
251 //     stbi_is_hdr(char *filename);
252 //
253 // ===========================================================================
254 //
255 // iPhone PNG support:
256 //
257 // By default we convert iphone-formatted PNGs back to RGB, even though
258 // they are internally encoded differently. You can disable this conversion
259 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
260 // you will always just get the native iphone "format" through (which
261 // is BGR stored in RGB).
262 //
263 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
264 // pixel to remove any premultiplied alpha *only* if the image file explicitly
265 // says there's premultiplied data (currently only happens in iPhone images,
266 // and only if iPhone convert-to-rgb processing is on).
267 //
268 // ===========================================================================
269 //
270 // ADDITIONAL CONFIGURATION
271 //
272 //  - You can suppress implementation of any of the decoders to reduce
273 //    your code footprint by #defining one or more of the following
274 //    symbols before creating the implementation.
275 //
276 //        STBI_NO_JPEG
277 //        STBI_NO_PNG
278 //        STBI_NO_BMP
279 //        STBI_NO_PSD
280 //        STBI_NO_TGA
281 //        STBI_NO_GIF
282 //        STBI_NO_HDR
283 //        STBI_NO_PIC
284 //        STBI_NO_PNM   (.ppm and .pgm)
285 //
286 //  - You can request *only* certain decoders and suppress all other ones
287 //    (this will be more forward-compatible, as addition of new decoders
288 //    doesn't require you to disable them explicitly):
289 //
290 //        STBI_ONLY_JPEG
291 //        STBI_ONLY_PNG
292 //        STBI_ONLY_BMP
293 //        STBI_ONLY_PSD
294 //        STBI_ONLY_TGA
295 //        STBI_ONLY_GIF
296 //        STBI_ONLY_HDR
297 //        STBI_ONLY_PIC
298 //        STBI_ONLY_PNM   (.ppm and .pgm)
299 //
300 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
301 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
302 //
303 
304 
305 #ifndef STBI_NO_STDIO
306 #include <stdio.h>
307 #endif // STBI_NO_STDIO
308 
309 #define STBI_VERSION 1
310 
311 enum
312 {
313    STBI_default = 0, // only used for desired_channels
314 
315    STBI_grey       = 1,
316    STBI_grey_alpha = 2,
317    STBI_rgb        = 3,
318    STBI_rgb_alpha  = 4
319 };
320 
321 typedef unsigned char stbi_uc;
322 typedef unsigned short stbi_us;
323 
324 #ifdef __cplusplus
325 extern "C" {
326 #endif
327 
328 #ifdef STB_IMAGE_STATIC
329 #define STBIDEF static
330 #else
331 #define STBIDEF extern
332 #endif
333 
334 //////////////////////////////////////////////////////////////////////////////
335 //
336 // PRIMARY API - works on images of any type
337 //
338 
339 //
340 // load image by filename, open file, or memory buffer
341 //
342 
343 typedef struct
344 {
345    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
346    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
347    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
348 } stbi_io_callbacks;
349 
350 ////////////////////////////////////
351 //
352 // 8-bits-per-channel interface
353 //
354 
355 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
356 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
357 #ifndef STBI_NO_GIF
358 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
359 #endif
360 
361 
362 #ifndef STBI_NO_STDIO
363 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
364 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
365 // for stbi_load_from_file, file pointer is left pointing immediately after image
366 #endif
367 
368 ////////////////////////////////////
369 //
370 // 16-bits-per-channel interface
371 //
372 
373 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
374 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
375 
376 #ifndef STBI_NO_STDIO
377 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
378 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
379 #endif
380 
381 ////////////////////////////////////
382 //
383 // float-per-channel interface
384 //
385 #ifndef STBI_NO_LINEAR
386    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
387    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
388 
389    #ifndef STBI_NO_STDIO
390    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
391    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
392    #endif
393 #endif
394 
395 #ifndef STBI_NO_HDR
396    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
397    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
398 #endif // STBI_NO_HDR
399 
400 #ifndef STBI_NO_LINEAR
401    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
402    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
403 #endif // STBI_NO_LINEAR
404 
405 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
406 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
407 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
408 #ifndef STBI_NO_STDIO
409 STBIDEF int      stbi_is_hdr          (char const *filename);
410 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
411 #endif // STBI_NO_STDIO
412 
413 
414 // get a VERY brief reason for failure
415 // NOT THREADSAFE
416 STBIDEF const char *stbi_failure_reason  (void);
417 
418 // free the loaded image -- this is just free()
419 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
420 
421 // get image dimensions & components without fully decoding
422 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
423 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
424 STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
425 STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
426 
427 #ifndef STBI_NO_STDIO
428 STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
429 STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
430 STBIDEF int      stbi_is_16_bit          (char const *filename);
431 STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
432 #endif
433 
434 
435 
436 // for image formats that explicitly notate that they have premultiplied alpha,
437 // we just return the colors as stored in the file. set this flag to force
438 // unpremultiplication. results are undefined if the unpremultiply overflow.
439 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
440 
441 // indicate whether we should process iphone images back to canonical format,
442 // or just pass them through "as-is"
443 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
444 
445 // flip the image vertically, so the first pixel in the output array is the bottom left
446 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
447 
448 // ZLIB client - used by PNG, available for other purposes
449 
450 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
451 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
452 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
453 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
454 
455 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
456 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
457 
458 
459 #ifdef __cplusplus
460 }
461 #endif
462 
463 //
464 //
465 ////   end header file   /////////////////////////////////////////////////////
466 #endif // STBI_INCLUDE_STB_IMAGE_H
467 
468 #ifdef STB_IMAGE_IMPLEMENTATION
469 
470 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
471   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
472   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
473   || defined(STBI_ONLY_ZLIB)
474    #ifndef STBI_ONLY_JPEG
475    #define STBI_NO_JPEG
476    #endif
477    #ifndef STBI_ONLY_PNG
478    #define STBI_NO_PNG
479    #endif
480    #ifndef STBI_ONLY_BMP
481    #define STBI_NO_BMP
482    #endif
483    #ifndef STBI_ONLY_PSD
484    #define STBI_NO_PSD
485    #endif
486    #ifndef STBI_ONLY_TGA
487    #define STBI_NO_TGA
488    #endif
489    #ifndef STBI_ONLY_GIF
490    #define STBI_NO_GIF
491    #endif
492    #ifndef STBI_ONLY_HDR
493    #define STBI_NO_HDR
494    #endif
495    #ifndef STBI_ONLY_PIC
496    #define STBI_NO_PIC
497    #endif
498    #ifndef STBI_ONLY_PNM
499    #define STBI_NO_PNM
500    #endif
501 #endif
502 
503 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
504 #define STBI_NO_ZLIB
505 #endif
506 
507 
508 #include <stdarg.h>
509 #include <stddef.h> // ptrdiff_t on osx
510 #include <stdlib.h>
511 #include <string.h>
512 #include <limits.h>
513 
514 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
515 #include <math.h>  // ldexp, pow
516 #endif
517 
518 #ifndef STBI_NO_STDIO
519 #include <stdio.h>
520 #endif
521 
522 #ifndef STBI_ASSERT
523 #include <assert.h>
524 #define STBI_ASSERT(x) assert(x)
525 #endif
526 
527 
528 #ifndef _MSC_VER
529    #ifdef __cplusplus
530    #define stbi_inline inline
531    #else
532    #define stbi_inline
533    #endif
534 #else
535    #define stbi_inline __forceinline
536 #endif
537 
538 
539 #ifdef _MSC_VER
540 typedef unsigned short stbi__uint16;
541 typedef   signed short stbi__int16;
542 typedef unsigned int   stbi__uint32;
543 typedef   signed int   stbi__int32;
544 #else
545 #include <stdint.h>
546 typedef uint16_t stbi__uint16;
547 typedef int16_t  stbi__int16;
548 typedef uint32_t stbi__uint32;
549 typedef int32_t  stbi__int32;
550 #endif
551 
552 // should produce compiler error if size is wrong
553 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
554 
555 #ifdef _MSC_VER
556 #define STBI_NOTUSED(v)  (void)(v)
557 #else
558 #define STBI_NOTUSED(v)  (void)sizeof(v)
559 #endif
560 
561 #ifdef _MSC_VER
562 #define STBI_HAS_LROTL
563 #endif
564 
565 #ifdef STBI_HAS_LROTL
566    #define stbi_lrot(x,y)  _lrotl(x,y)
567 #else
568    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
569 #endif
570 
571 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
572 // ok
573 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
574 // ok
575 #else
576 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
577 #endif
578 
579 #ifndef STBI_MALLOC
580 #define STBI_MALLOC(sz)           malloc(sz)
581 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
582 #define STBI_FREE(p)              free(p)
583 #endif
584 
585 #ifndef STBI_REALLOC_SIZED
586 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
587 #endif
588 
589 // x86/x64 detection
590 #if defined(__x86_64__) || defined(_M_X64)
591 #define STBI__X64_TARGET
592 #elif defined(__i386) || defined(_M_IX86)
593 #define STBI__X86_TARGET
594 #endif
595 
596 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
597 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
598 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
599 // but previous attempts to provide the SSE2 functions with runtime
600 // detection caused numerous issues. The way architecture extensions are
601 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
602 // New behavior: if compiled with -msse2, we use SSE2 without any
603 // detection; if not, we don't use it at all.
604 #define STBI_NO_SIMD
605 #endif
606 
607 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
608 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
609 //
610 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
611 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
612 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
613 // simultaneously enabling "-mstackrealign".
614 //
615 // See https://github.com/nothings/stb/issues/81 for more information.
616 //
617 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
618 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
619 #define STBI_NO_SIMD
620 #endif
621 
622 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
623 #define STBI_SSE2
624 #include <emmintrin.h>
625 
626 #ifdef _MSC_VER
627 
628 #if _MSC_VER >= 1400  // not VC6
629 #include <intrin.h> // __cpuid
stbi__cpuid3(void)630 static int stbi__cpuid3(void)
631 {
632    int info[4];
633    __cpuid(info,1);
634    return info[3];
635 }
636 #else
stbi__cpuid3(void)637 static int stbi__cpuid3(void)
638 {
639    int res;
640    __asm {
641       mov  eax,1
642       cpuid
643       mov  res,edx
644    }
645    return res;
646 }
647 #endif
648 
649 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
650 
stbi__sse2_available(void)651 static int stbi__sse2_available(void)
652 {
653    int info3 = stbi__cpuid3();
654    return ((info3 >> 26) & 1) != 0;
655 }
656 #else // assume GCC-style if not VC++
657 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
658 
stbi__sse2_available(void)659 static int stbi__sse2_available(void)
660 {
661    // If we're even attempting to compile this on GCC/Clang, that means
662    // -msse2 is on, which means the compiler is allowed to use SSE2
663    // instructions at will, and so are we.
664    return 1;
665 }
666 #endif
667 #endif
668 
669 // ARM NEON
670 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
671 #undef STBI_NEON
672 #endif
673 
674 #ifdef STBI_NEON
675 #include <arm_neon.h>
676 // assume GCC or Clang on ARM targets
677 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
678 #endif
679 
680 #ifndef STBI_SIMD_ALIGN
681 #define STBI_SIMD_ALIGN(type, name) type name
682 #endif
683 
684 ///////////////////////////////////////////////
685 //
686 //  stbi__context struct and start_xxx functions
687 
688 // stbi__context structure is our basic context used by all images, so it
689 // contains all the IO context, plus some basic image information
690 typedef struct
691 {
692    stbi__uint32 img_x, img_y;
693    int img_n, img_out_n;
694 
695    stbi_io_callbacks io;
696    void *io_user_data;
697 
698    int read_from_callbacks;
699    int buflen;
700    stbi_uc buffer_start[128];
701 
702    stbi_uc *img_buffer, *img_buffer_end;
703    stbi_uc *img_buffer_original, *img_buffer_original_end;
704 } stbi__context;
705 
706 
707 static void stbi__refill_buffer(stbi__context *s);
708 
709 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)710 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
711 {
712    s->io.read = NULL;
713    s->read_from_callbacks = 0;
714    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
715    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
716 }
717 
718 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)719 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
720 {
721    s->io = *c;
722    s->io_user_data = user;
723    s->buflen = sizeof(s->buffer_start);
724    s->read_from_callbacks = 1;
725    s->img_buffer_original = s->buffer_start;
726    stbi__refill_buffer(s);
727    s->img_buffer_original_end = s->img_buffer_end;
728 }
729 
730 #ifndef STBI_NO_STDIO
731 
stbi__stdio_read(void * user,char * data,int size)732 static int stbi__stdio_read(void *user, char *data, int size)
733 {
734    return (int) fread(data,1,size,(FILE*) user);
735 }
736 
stbi__stdio_skip(void * user,int n)737 static void stbi__stdio_skip(void *user, int n)
738 {
739    fseek((FILE*) user, n, SEEK_CUR);
740 }
741 
stbi__stdio_eof(void * user)742 static int stbi__stdio_eof(void *user)
743 {
744    return feof((FILE*) user);
745 }
746 
747 static stbi_io_callbacks stbi__stdio_callbacks =
748 {
749    stbi__stdio_read,
750    stbi__stdio_skip,
751    stbi__stdio_eof,
752 };
753 
stbi__start_file(stbi__context * s,FILE * f)754 static void stbi__start_file(stbi__context *s, FILE *f)
755 {
756    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
757 }
758 
759 //static void stop_file(stbi__context *s) { }
760 
761 #endif // !STBI_NO_STDIO
762 
stbi__rewind(stbi__context * s)763 static void stbi__rewind(stbi__context *s)
764 {
765    // conceptually rewind SHOULD rewind to the beginning of the stream,
766    // but we just rewind to the beginning of the initial buffer, because
767    // we only use it after doing 'test', which only ever looks at at most 92 bytes
768    s->img_buffer = s->img_buffer_original;
769    s->img_buffer_end = s->img_buffer_original_end;
770 }
771 
772 enum
773 {
774    STBI_ORDER_RGB,
775    STBI_ORDER_BGR
776 };
777 
778 typedef struct
779 {
780    int bits_per_channel;
781    int num_channels;
782    int channel_order;
783 } stbi__result_info;
784 
785 #ifndef STBI_NO_JPEG
786 static int      stbi__jpeg_test(stbi__context *s);
787 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
788 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
789 #endif
790 
791 #ifndef STBI_NO_PNG
792 static int      stbi__png_test(stbi__context *s);
793 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
794 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
795 static int      stbi__png_is16(stbi__context *s);
796 #endif
797 
798 #ifndef STBI_NO_BMP
799 static int      stbi__bmp_test(stbi__context *s);
800 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
801 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
802 #endif
803 
804 #ifndef STBI_NO_TGA
805 static int      stbi__tga_test(stbi__context *s);
806 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
807 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
808 #endif
809 
810 #ifndef STBI_NO_PSD
811 static int      stbi__psd_test(stbi__context *s);
812 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
813 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
814 static int      stbi__psd_is16(stbi__context *s);
815 #endif
816 
817 #ifndef STBI_NO_HDR
818 static int      stbi__hdr_test(stbi__context *s);
819 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
820 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
821 #endif
822 
823 #ifndef STBI_NO_PIC
824 static int      stbi__pic_test(stbi__context *s);
825 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
826 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
827 #endif
828 
829 #ifndef STBI_NO_GIF
830 static int      stbi__gif_test(stbi__context *s);
831 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
832 static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
833 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
834 #endif
835 
836 #ifndef STBI_NO_PNM
837 static int      stbi__pnm_test(stbi__context *s);
838 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
839 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
840 #endif
841 
842 // this is not threadsafe
843 static const char *stbi__g_failure_reason;
844 
stbi_failure_reason(void)845 STBIDEF const char *stbi_failure_reason(void)
846 {
847    return stbi__g_failure_reason;
848 }
849 
stbi__err(const char * str)850 static int stbi__err(const char *str)
851 {
852    stbi__g_failure_reason = str;
853    return 0;
854 }
855 
stbi__malloc(size_t size)856 static void *stbi__malloc(size_t size)
857 {
858     return STBI_MALLOC(size);
859 }
860 
861 // stb_image uses ints pervasively, including for offset calculations.
862 // therefore the largest decoded image size we can support with the
863 // current code, even on 64-bit targets, is INT_MAX. this is not a
864 // significant limitation for the intended use case.
865 //
866 // we do, however, need to make sure our size calculations don't
867 // overflow. hence a few helper functions for size calculations that
868 // multiply integers together, making sure that they're non-negative
869 // and no overflow occurs.
870 
871 // return 1 if the sum is valid, 0 on overflow.
872 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)873 static int stbi__addsizes_valid(int a, int b)
874 {
875    if (b < 0) return 0;
876    // now 0 <= b <= INT_MAX, hence also
877    // 0 <= INT_MAX - b <= INTMAX.
878    // And "a + b <= INT_MAX" (which might overflow) is the
879    // same as a <= INT_MAX - b (no overflow)
880    return a <= INT_MAX - b;
881 }
882 
883 // returns 1 if the product is valid, 0 on overflow.
884 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)885 static int stbi__mul2sizes_valid(int a, int b)
886 {
887    if (a < 0 || b < 0) return 0;
888    if (b == 0) return 1; // mul-by-0 is always safe
889    // portable way to check for no overflows in a*b
890    return a <= INT_MAX/b;
891 }
892 
893 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)894 static int stbi__mad2sizes_valid(int a, int b, int add)
895 {
896    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
897 }
898 
899 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)900 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
901 {
902    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
903       stbi__addsizes_valid(a*b*c, add);
904 }
905 
906 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
907 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)908 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
909 {
910    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
911       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
912 }
913 #endif
914 
915 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)916 static void *stbi__malloc_mad2(int a, int b, int add)
917 {
918    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
919    return stbi__malloc(a*b + add);
920 }
921 
stbi__malloc_mad3(int a,int b,int c,int add)922 static void *stbi__malloc_mad3(int a, int b, int c, int add)
923 {
924    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
925    return stbi__malloc(a*b*c + add);
926 }
927 
928 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__malloc_mad4(int a,int b,int c,int d,int add)929 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
930 {
931    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
932    return stbi__malloc(a*b*c*d + add);
933 }
934 #endif
935 
936 // stbi__err - error
937 // stbi__errpf - error returning pointer to float
938 // stbi__errpuc - error returning pointer to unsigned char
939 
940 #ifdef STBI_NO_FAILURE_STRINGS
941    #define stbi__err(x,y)  0
942 #elif defined(STBI_FAILURE_USERMSG)
943    #define stbi__err(x,y)  stbi__err(y)
944 #else
945    #define stbi__err(x,y)  stbi__err(x)
946 #endif
947 
948 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
949 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
950 
stbi_image_free(void * retval_from_stbi_load)951 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
952 {
953    STBI_FREE(retval_from_stbi_load);
954 }
955 
956 #ifndef STBI_NO_LINEAR
957 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
958 #endif
959 
960 #ifndef STBI_NO_HDR
961 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
962 #endif
963 
964 static int stbi__vertically_flip_on_load = 0;
965 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)966 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
967 {
968     stbi__vertically_flip_on_load = flag_true_if_should_flip;
969 }
970 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)971 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
972 {
973    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
974    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
975    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
976    ri->num_channels = 0;
977 
978    #ifndef STBI_NO_JPEG
979    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
980    #endif
981    #ifndef STBI_NO_PNG
982    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
983    #endif
984    #ifndef STBI_NO_BMP
985    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
986    #endif
987    #ifndef STBI_NO_GIF
988    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
989    #endif
990    #ifndef STBI_NO_PSD
991    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
992    #endif
993    #ifndef STBI_NO_PIC
994    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
995    #endif
996    #ifndef STBI_NO_PNM
997    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
998    #endif
999 
1000    #ifndef STBI_NO_HDR
1001    if (stbi__hdr_test(s)) {
1002       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1003       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1004    }
1005    #endif
1006 
1007    #ifndef STBI_NO_TGA
1008    // test tga last because it's a crappy test!
1009    if (stbi__tga_test(s))
1010       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1011    #endif
1012 
1013    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1014 }
1015 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1016 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1017 {
1018    int i;
1019    int img_len = w * h * channels;
1020    stbi_uc *reduced;
1021 
1022    reduced = (stbi_uc *) stbi__malloc(img_len);
1023    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1024 
1025    for (i = 0; i < img_len; ++i)
1026       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1027 
1028    STBI_FREE(orig);
1029    return reduced;
1030 }
1031 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1032 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1033 {
1034    int i;
1035    int img_len = w * h * channels;
1036    stbi__uint16 *enlarged;
1037 
1038    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1039    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1040 
1041    for (i = 0; i < img_len; ++i)
1042       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1043 
1044    STBI_FREE(orig);
1045    return enlarged;
1046 }
1047 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1048 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1049 {
1050    int row;
1051    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1052    stbi_uc temp[2048];
1053    stbi_uc *bytes = (stbi_uc *)image;
1054 
1055    for (row = 0; row < (h>>1); row++) {
1056       stbi_uc *row0 = bytes + row*bytes_per_row;
1057       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1058       // swap row0 with row1
1059       size_t bytes_left = bytes_per_row;
1060       while (bytes_left) {
1061          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1062          memcpy(temp, row0, bytes_copy);
1063          memcpy(row0, row1, bytes_copy);
1064          memcpy(row1, temp, bytes_copy);
1065          row0 += bytes_copy;
1066          row1 += bytes_copy;
1067          bytes_left -= bytes_copy;
1068       }
1069    }
1070 }
1071 
stbi__vertical_flip_slices(void * image,int w,int h,int z,int bytes_per_pixel)1072 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1073 {
1074    int slice;
1075    int slice_size = w * h * bytes_per_pixel;
1076 
1077    stbi_uc *bytes = (stbi_uc *)image;
1078    for (slice = 0; slice < z; ++slice) {
1079       stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1080       bytes += slice_size;
1081    }
1082 }
1083 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1084 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1085 {
1086    stbi__result_info ri;
1087    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1088 
1089    if (result == NULL)
1090       return NULL;
1091 
1092    if (ri.bits_per_channel != 8) {
1093       STBI_ASSERT(ri.bits_per_channel == 16);
1094       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1095       ri.bits_per_channel = 8;
1096    }
1097 
1098    // @TODO: move stbi__convert_format to here
1099 
1100    if (stbi__vertically_flip_on_load) {
1101       int channels = req_comp ? req_comp : *comp;
1102       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1103    }
1104 
1105    return (unsigned char *) result;
1106 }
1107 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1108 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1109 {
1110    stbi__result_info ri;
1111    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1112 
1113    if (result == NULL)
1114       return NULL;
1115 
1116    if (ri.bits_per_channel != 16) {
1117       STBI_ASSERT(ri.bits_per_channel == 8);
1118       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1119       ri.bits_per_channel = 16;
1120    }
1121 
1122    // @TODO: move stbi__convert_format16 to here
1123    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1124 
1125    if (stbi__vertically_flip_on_load) {
1126       int channels = req_comp ? req_comp : *comp;
1127       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1128    }
1129 
1130    return (stbi__uint16 *) result;
1131 }
1132 
1133 #if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR)
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1134 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1135 {
1136    if (stbi__vertically_flip_on_load && result != NULL) {
1137       int channels = req_comp ? req_comp : *comp;
1138       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1139    }
1140 }
1141 #endif
1142 
1143 #ifndef STBI_NO_STDIO
1144 
stbi__fopen(char const * filename,char const * mode)1145 static FILE *stbi__fopen(char const *filename, char const *mode)
1146 {
1147    FILE *f;
1148 #if defined(_MSC_VER) && _MSC_VER >= 1400
1149    if (0 != fopen_s(&f, filename, mode))
1150       f=0;
1151 #else
1152    f = fopen(filename, mode);
1153 #endif
1154    return f;
1155 }
1156 
1157 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1158 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1159 {
1160    FILE *f = stbi__fopen(filename, "rb");
1161    unsigned char *result;
1162    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1163    result = stbi_load_from_file(f,x,y,comp,req_comp);
1164    fclose(f);
1165    return result;
1166 }
1167 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1168 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1169 {
1170    unsigned char *result;
1171    stbi__context s;
1172    stbi__start_file(&s,f);
1173    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1174    if (result) {
1175       // need to 'unget' all the characters in the IO buffer
1176       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1177    }
1178    return result;
1179 }
1180 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1181 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1182 {
1183    stbi__uint16 *result;
1184    stbi__context s;
1185    stbi__start_file(&s,f);
1186    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1187    if (result) {
1188       // need to 'unget' all the characters in the IO buffer
1189       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1190    }
1191    return result;
1192 }
1193 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1194 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1195 {
1196    FILE *f = stbi__fopen(filename, "rb");
1197    stbi__uint16 *result;
1198    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1199    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1200    fclose(f);
1201    return result;
1202 }
1203 
1204 
1205 #endif //!STBI_NO_STDIO
1206 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1207 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1208 {
1209    stbi__context s;
1210    stbi__start_mem(&s,buffer,len);
1211    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1212 }
1213 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1214 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1215 {
1216    stbi__context s;
1217    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1218    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1219 }
1220 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1221 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1222 {
1223    stbi__context s;
1224    stbi__start_mem(&s,buffer,len);
1225    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1226 }
1227 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1228 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1229 {
1230    stbi__context s;
1231    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1232    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1233 }
1234 
1235 #ifndef STBI_NO_GIF
stbi_load_gif_from_memory(stbi_uc const * buffer,int len,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)1236 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1237 {
1238    unsigned char *result;
1239    stbi__context s;
1240    stbi__start_mem(&s,buffer,len);
1241 
1242    result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1243    if (stbi__vertically_flip_on_load) {
1244       stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1245    }
1246 
1247    return result;
1248 }
1249 #endif
1250 
1251 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1252 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1253 {
1254    unsigned char *data;
1255    #ifndef STBI_NO_HDR
1256    if (stbi__hdr_test(s)) {
1257       stbi__result_info ri;
1258       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1259       if (hdr_data)
1260          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1261       return hdr_data;
1262    }
1263    #endif
1264    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1265    if (data)
1266       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1267    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1268 }
1269 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1270 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1271 {
1272    stbi__context s;
1273    stbi__start_mem(&s,buffer,len);
1274    return stbi__loadf_main(&s,x,y,comp,req_comp);
1275 }
1276 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1277 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1278 {
1279    stbi__context s;
1280    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1281    return stbi__loadf_main(&s,x,y,comp,req_comp);
1282 }
1283 
1284 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1285 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1286 {
1287    float *result;
1288    FILE *f = stbi__fopen(filename, "rb");
1289    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1290    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1291    fclose(f);
1292    return result;
1293 }
1294 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1295 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1296 {
1297    stbi__context s;
1298    stbi__start_file(&s,f);
1299    return stbi__loadf_main(&s,x,y,comp,req_comp);
1300 }
1301 #endif // !STBI_NO_STDIO
1302 
1303 #endif // !STBI_NO_LINEAR
1304 
1305 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1306 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1307 // reports false!
1308 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1309 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1310 {
1311    #ifndef STBI_NO_HDR
1312    stbi__context s;
1313    stbi__start_mem(&s,buffer,len);
1314    return stbi__hdr_test(&s);
1315    #else
1316    STBI_NOTUSED(buffer);
1317    STBI_NOTUSED(len);
1318    return 0;
1319    #endif
1320 }
1321 
1322 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1323 STBIDEF int      stbi_is_hdr          (char const *filename)
1324 {
1325    FILE *f = stbi__fopen(filename, "rb");
1326    int result=0;
1327    if (f) {
1328       result = stbi_is_hdr_from_file(f);
1329       fclose(f);
1330    }
1331    return result;
1332 }
1333 
stbi_is_hdr_from_file(FILE * f)1334 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1335 {
1336    #ifndef STBI_NO_HDR
1337    long pos = ftell(f);
1338    int res;
1339    stbi__context s;
1340    stbi__start_file(&s,f);
1341    res = stbi__hdr_test(&s);
1342    fseek(f, pos, SEEK_SET);
1343    return res;
1344    #else
1345    STBI_NOTUSED(f);
1346    return 0;
1347    #endif
1348 }
1349 #endif // !STBI_NO_STDIO
1350 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1351 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1352 {
1353    #ifndef STBI_NO_HDR
1354    stbi__context s;
1355    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1356    return stbi__hdr_test(&s);
1357    #else
1358    STBI_NOTUSED(clbk);
1359    STBI_NOTUSED(user);
1360    return 0;
1361    #endif
1362 }
1363 
1364 #ifndef STBI_NO_LINEAR
1365 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1366 
stbi_ldr_to_hdr_gamma(float gamma)1367 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1368 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1369 #endif
1370 
1371 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1372 
stbi_hdr_to_ldr_gamma(float gamma)1373 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1374 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1375 
1376 
1377 //////////////////////////////////////////////////////////////////////////////
1378 //
1379 // Common code used by all image loaders
1380 //
1381 
1382 enum
1383 {
1384    STBI__SCAN_load=0,
1385    STBI__SCAN_type,
1386    STBI__SCAN_header
1387 };
1388 
stbi__refill_buffer(stbi__context * s)1389 static void stbi__refill_buffer(stbi__context *s)
1390 {
1391    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1392    if (n == 0) {
1393       // at end of file, treat same as if from memory, but need to handle case
1394       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1395       s->read_from_callbacks = 0;
1396       s->img_buffer = s->buffer_start;
1397       s->img_buffer_end = s->buffer_start+1;
1398       *s->img_buffer = 0;
1399    } else {
1400       s->img_buffer = s->buffer_start;
1401       s->img_buffer_end = s->buffer_start + n;
1402    }
1403 }
1404 
stbi__get8(stbi__context * s)1405 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1406 {
1407    if (s->img_buffer < s->img_buffer_end)
1408       return *s->img_buffer++;
1409    if (s->read_from_callbacks) {
1410       stbi__refill_buffer(s);
1411       return *s->img_buffer++;
1412    }
1413    return 0;
1414 }
1415 
stbi__at_eof(stbi__context * s)1416 stbi_inline static int stbi__at_eof(stbi__context *s)
1417 {
1418    if (s->io.read) {
1419       if (!(s->io.eof)(s->io_user_data)) return 0;
1420       // if feof() is true, check if buffer = end
1421       // special case: we've only got the special 0 character at the end
1422       if (s->read_from_callbacks == 0) return 1;
1423    }
1424 
1425    return s->img_buffer >= s->img_buffer_end;
1426 }
1427 
stbi__skip(stbi__context * s,int n)1428 static void stbi__skip(stbi__context *s, int n)
1429 {
1430    if (n < 0) {
1431       s->img_buffer = s->img_buffer_end;
1432       return;
1433    }
1434    if (s->io.read) {
1435       int blen = (int) (s->img_buffer_end - s->img_buffer);
1436       if (blen < n) {
1437          s->img_buffer = s->img_buffer_end;
1438          (s->io.skip)(s->io_user_data, n - blen);
1439          return;
1440       }
1441    }
1442    s->img_buffer += n;
1443 }
1444 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1445 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1446 {
1447    if (s->io.read) {
1448       int blen = (int) (s->img_buffer_end - s->img_buffer);
1449       if (blen < n) {
1450          int res, count;
1451 
1452          memcpy(buffer, s->img_buffer, blen);
1453 
1454          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1455          res = (count == (n-blen));
1456          s->img_buffer = s->img_buffer_end;
1457          return res;
1458       }
1459    }
1460 
1461    if (s->img_buffer+n <= s->img_buffer_end) {
1462       memcpy(buffer, s->img_buffer, n);
1463       s->img_buffer += n;
1464       return 1;
1465    } else
1466       return 0;
1467 }
1468 
stbi__get16be(stbi__context * s)1469 static int stbi__get16be(stbi__context *s)
1470 {
1471    int z = stbi__get8(s);
1472    return (z << 8) + stbi__get8(s);
1473 }
1474 
stbi__get32be(stbi__context * s)1475 static stbi__uint32 stbi__get32be(stbi__context *s)
1476 {
1477    stbi__uint32 z = stbi__get16be(s);
1478    return (z << 16) + stbi__get16be(s);
1479 }
1480 
1481 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1482 // nothing
1483 #else
stbi__get16le(stbi__context * s)1484 static int stbi__get16le(stbi__context *s)
1485 {
1486    int z = stbi__get8(s);
1487    return z + (stbi__get8(s) << 8);
1488 }
1489 #endif
1490 
1491 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1492 static stbi__uint32 stbi__get32le(stbi__context *s)
1493 {
1494    stbi__uint32 z = stbi__get16le(s);
1495    return z + (stbi__get16le(s) << 16);
1496 }
1497 #endif
1498 
1499 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1500 
1501 
1502 //////////////////////////////////////////////////////////////////////////////
1503 //
1504 //  generic converter from built-in img_n to req_comp
1505 //    individual types do this automatically as much as possible (e.g. jpeg
1506 //    does all cases internally since it needs to colorspace convert anyway,
1507 //    and it never has alpha, so very few cases ). png can automatically
1508 //    interleave an alpha=255 channel, but falls back to this for other cases
1509 //
1510 //  assume data buffer is malloced, so malloc a new one and free that one
1511 //  only failure mode is malloc failing
1512 
stbi__compute_y(int r,int g,int b)1513 static stbi_uc stbi__compute_y(int r, int g, int b)
1514 {
1515    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1516 }
1517 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1518 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1519 {
1520    int i,j;
1521    unsigned char *good;
1522 
1523    if (req_comp == img_n) return data;
1524    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1525 
1526    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1527    if (good == NULL) {
1528       STBI_FREE(data);
1529       return stbi__errpuc("outofmem", "Out of memory");
1530    }
1531 
1532    for (j=0; j < (int) y; ++j) {
1533       unsigned char *src  = data + j * x * img_n   ;
1534       unsigned char *dest = good + j * x * req_comp;
1535 
1536       #define STBI__COMBO(a,b)  ((a)*8+(b))
1537       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1538       // convert source image with img_n components to one with req_comp components;
1539       // avoid switch per pixel, so use switch per scanline and massive macros
1540       switch (STBI__COMBO(img_n, req_comp)) {
1541          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1542          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1543          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1544          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1545          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1546          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1547          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1548          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1549          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1550          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1551          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1552          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1553          default: STBI_ASSERT(0);
1554       }
1555       #undef STBI__CASE
1556    }
1557 
1558    STBI_FREE(data);
1559    return good;
1560 }
1561 
stbi__compute_y_16(int r,int g,int b)1562 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1563 {
1564    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1565 }
1566 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1567 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1568 {
1569    int i,j;
1570    stbi__uint16 *good;
1571 
1572    if (req_comp == img_n) return data;
1573    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1574 
1575    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1576    if (good == NULL) {
1577       STBI_FREE(data);
1578       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1579    }
1580 
1581    for (j=0; j < (int) y; ++j) {
1582       stbi__uint16 *src  = data + j * x * img_n   ;
1583       stbi__uint16 *dest = good + j * x * req_comp;
1584 
1585       #define STBI__COMBO(a,b)  ((a)*8+(b))
1586       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1587       // convert source image with img_n components to one with req_comp components;
1588       // avoid switch per pixel, so use switch per scanline and massive macros
1589       switch (STBI__COMBO(img_n, req_comp)) {
1590          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1591          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1592          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1593          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1594          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1595          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1596          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1597          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1598          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1599          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1600          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1601          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1602          default: STBI_ASSERT(0);
1603       }
1604       #undef STBI__CASE
1605    }
1606 
1607    STBI_FREE(data);
1608    return good;
1609 }
1610 
1611 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1612 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1613 {
1614    int i,k,n;
1615    float *output;
1616    if (!data) return NULL;
1617    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1618    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1619    // compute number of non-alpha components
1620    if (comp & 1) n = comp; else n = comp-1;
1621    for (i=0; i < x*y; ++i) {
1622       for (k=0; k < n; ++k) {
1623          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1624       }
1625       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1626    }
1627    STBI_FREE(data);
1628    return output;
1629 }
1630 #endif
1631 
1632 #ifndef STBI_NO_HDR
1633 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1634 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1635 {
1636    int i,k,n;
1637    stbi_uc *output;
1638    if (!data) return NULL;
1639    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1640    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1641    // compute number of non-alpha components
1642    if (comp & 1) n = comp; else n = comp-1;
1643    for (i=0; i < x*y; ++i) {
1644       for (k=0; k < n; ++k) {
1645          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1646          if (z < 0) z = 0;
1647          if (z > 255) z = 255;
1648          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1649       }
1650       if (k < comp) {
1651          float z = data[i*comp+k] * 255 + 0.5f;
1652          if (z < 0) z = 0;
1653          if (z > 255) z = 255;
1654          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1655       }
1656    }
1657    STBI_FREE(data);
1658    return output;
1659 }
1660 #endif
1661 
1662 //////////////////////////////////////////////////////////////////////////////
1663 //
1664 //  "baseline" JPEG/JFIF decoder
1665 //
1666 //    simple implementation
1667 //      - doesn't support delayed output of y-dimension
1668 //      - simple interface (only one output format: 8-bit interleaved RGB)
1669 //      - doesn't try to recover corrupt jpegs
1670 //      - doesn't allow partial loading, loading multiple at once
1671 //      - still fast on x86 (copying globals into locals doesn't help x86)
1672 //      - allocates lots of intermediate memory (full size of all components)
1673 //        - non-interleaved case requires this anyway
1674 //        - allows good upsampling (see next)
1675 //    high-quality
1676 //      - upsampled channels are bilinearly interpolated, even across blocks
1677 //      - quality integer IDCT derived from IJG's 'slow'
1678 //    performance
1679 //      - fast huffman; reasonable integer IDCT
1680 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1681 //      - uses a lot of intermediate memory, could cache poorly
1682 
1683 #ifndef STBI_NO_JPEG
1684 
1685 // huffman decoding acceleration
1686 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1687 
1688 typedef struct
1689 {
1690    stbi_uc  fast[1 << FAST_BITS];
1691    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1692    stbi__uint16 code[256];
1693    stbi_uc  values[256];
1694    stbi_uc  size[257];
1695    unsigned int maxcode[18];
1696    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1697 } stbi__huffman;
1698 
1699 typedef struct
1700 {
1701    stbi__context *s;
1702    stbi__huffman huff_dc[4];
1703    stbi__huffman huff_ac[4];
1704    stbi__uint16 dequant[4][64];
1705    stbi__int16 fast_ac[4][1 << FAST_BITS];
1706 
1707 // sizes for components, interleaved MCUs
1708    int img_h_max, img_v_max;
1709    int img_mcu_x, img_mcu_y;
1710    int img_mcu_w, img_mcu_h;
1711 
1712 // definition of jpeg image component
1713    struct
1714    {
1715       int id;
1716       int h,v;
1717       int tq;
1718       int hd,ha;
1719       int dc_pred;
1720 
1721       int x,y,w2,h2;
1722       stbi_uc *data;
1723       void *raw_data, *raw_coeff;
1724       stbi_uc *linebuf;
1725       short   *coeff;   // progressive only
1726       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1727    } img_comp[4];
1728 
1729    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1730    int            code_bits;   // number of valid bits
1731    unsigned char  marker;      // marker seen while filling entropy buffer
1732    int            nomore;      // flag if we saw a marker so must stop
1733 
1734    int            progressive;
1735    int            spec_start;
1736    int            spec_end;
1737    int            succ_high;
1738    int            succ_low;
1739    int            eob_run;
1740    int            jfif;
1741    int            app14_color_transform; // Adobe APP14 tag
1742    int            rgb;
1743 
1744    int scan_n, order[4];
1745    int restart_interval, todo;
1746 
1747 // kernels
1748    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1749    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1750    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1751 } stbi__jpeg;
1752 
stbi__build_huffman(stbi__huffman * h,int * count)1753 static int stbi__build_huffman(stbi__huffman *h, int *count)
1754 {
1755    int i,j,k=0;
1756    unsigned int code;
1757    // build size list for each symbol (from JPEG spec)
1758    for (i=0; i < 16; ++i)
1759       for (j=0; j < count[i]; ++j)
1760          h->size[k++] = (stbi_uc) (i+1);
1761    h->size[k] = 0;
1762 
1763    // compute actual symbols (from jpeg spec)
1764    code = 0;
1765    k = 0;
1766    for(j=1; j <= 16; ++j) {
1767       // compute delta to add to code to compute symbol id
1768       h->delta[j] = k - code;
1769       if (h->size[k] == j) {
1770          while (h->size[k] == j)
1771             h->code[k++] = (stbi__uint16) (code++);
1772          if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1773       }
1774       // compute largest code + 1 for this size, preshifted as needed later
1775       h->maxcode[j] = code << (16-j);
1776       code <<= 1;
1777    }
1778    h->maxcode[j] = 0xffffffff;
1779 
1780    // build non-spec acceleration table; 255 is flag for not-accelerated
1781    memset(h->fast, 255, 1 << FAST_BITS);
1782    for (i=0; i < k; ++i) {
1783       int s = h->size[i];
1784       if (s <= FAST_BITS) {
1785          int c = h->code[i] << (FAST_BITS-s);
1786          int m = 1 << (FAST_BITS-s);
1787          for (j=0; j < m; ++j) {
1788             h->fast[c+j] = (stbi_uc) i;
1789          }
1790       }
1791    }
1792    return 1;
1793 }
1794 
1795 // build a table that decodes both magnitude and value of small ACs in
1796 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1797 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1798 {
1799    int i;
1800    for (i=0; i < (1 << FAST_BITS); ++i) {
1801       stbi_uc fast = h->fast[i];
1802       fast_ac[i] = 0;
1803       if (fast < 255) {
1804          int rs = h->values[fast];
1805          int run = (rs >> 4) & 15;
1806          int magbits = rs & 15;
1807          int len = h->size[fast];
1808 
1809          if (magbits && len + magbits <= FAST_BITS) {
1810             // magnitude code followed by receive_extend code
1811             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1812             int m = 1 << (magbits - 1);
1813             if (k < m) k += (~0U << magbits) + 1;
1814             // if the result is small enough, we can fit it in fast_ac table
1815             if (k >= -128 && k <= 127)
1816                fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1817          }
1818       }
1819    }
1820 }
1821 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1822 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1823 {
1824    do {
1825       unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1826       if (b == 0xff) {
1827          int c = stbi__get8(j->s);
1828          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1829          if (c != 0) {
1830             j->marker = (unsigned char) c;
1831             j->nomore = 1;
1832             return;
1833          }
1834       }
1835       j->code_buffer |= b << (24 - j->code_bits);
1836       j->code_bits += 8;
1837    } while (j->code_bits <= 24);
1838 }
1839 
1840 // (1 << n) - 1
1841 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1842 
1843 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1844 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1845 {
1846    unsigned int temp;
1847    int c,k;
1848 
1849    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1850 
1851    // look at the top FAST_BITS and determine what symbol ID it is,
1852    // if the code is <= FAST_BITS
1853    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1854    k = h->fast[c];
1855    if (k < 255) {
1856       int s = h->size[k];
1857       if (s > j->code_bits)
1858          return -1;
1859       j->code_buffer <<= s;
1860       j->code_bits -= s;
1861       return h->values[k];
1862    }
1863 
1864    // naive test is to shift the code_buffer down so k bits are
1865    // valid, then test against maxcode. To speed this up, we've
1866    // preshifted maxcode left so that it has (16-k) 0s at the
1867    // end; in other words, regardless of the number of bits, it
1868    // wants to be compared against something shifted to have 16;
1869    // that way we don't need to shift inside the loop.
1870    temp = j->code_buffer >> 16;
1871    for (k=FAST_BITS+1 ; ; ++k)
1872       if (temp < h->maxcode[k])
1873          break;
1874    if (k == 17) {
1875       // error! code not found
1876       j->code_bits -= 16;
1877       return -1;
1878    }
1879 
1880    if (k > j->code_bits)
1881       return -1;
1882 
1883    // convert the huffman code to the symbol id
1884    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1885    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1886 
1887    // convert the id to a symbol
1888    j->code_bits -= k;
1889    j->code_buffer <<= k;
1890    return h->values[c];
1891 }
1892 
1893 // bias[n] = (-1<<n) + 1
1894 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1895 
1896 // combined JPEG 'receive' and JPEG 'extend', since baseline
1897 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1898 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1899 {
1900    unsigned int k;
1901    int sgn;
1902    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1903 
1904    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1905    k = stbi_lrot(j->code_buffer, n);
1906    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1907    j->code_buffer = k & ~stbi__bmask[n];
1908    k &= stbi__bmask[n];
1909    j->code_bits -= n;
1910    return k + (stbi__jbias[n] & ~sgn);
1911 }
1912 
1913 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1914 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1915 {
1916    unsigned int k;
1917    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1918    k = stbi_lrot(j->code_buffer, n);
1919    j->code_buffer = k & ~stbi__bmask[n];
1920    k &= stbi__bmask[n];
1921    j->code_bits -= n;
1922    return k;
1923 }
1924 
stbi__jpeg_get_bit(stbi__jpeg * j)1925 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1926 {
1927    unsigned int k;
1928    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1929    k = j->code_buffer;
1930    j->code_buffer <<= 1;
1931    --j->code_bits;
1932    return k & 0x80000000;
1933 }
1934 
1935 // given a value that's at position X in the zigzag stream,
1936 // where does it appear in the 8x8 matrix coded as row-major?
1937 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
1938 {
1939     0,  1,  8, 16,  9,  2,  3, 10,
1940    17, 24, 32, 25, 18, 11,  4,  5,
1941    12, 19, 26, 33, 40, 48, 41, 34,
1942    27, 20, 13,  6,  7, 14, 21, 28,
1943    35, 42, 49, 56, 57, 50, 43, 36,
1944    29, 22, 15, 23, 30, 37, 44, 51,
1945    58, 59, 52, 45, 38, 31, 39, 46,
1946    53, 60, 61, 54, 47, 55, 62, 63,
1947    // let corrupt input sample past end
1948    63, 63, 63, 63, 63, 63, 63, 63,
1949    63, 63, 63, 63, 63, 63, 63
1950 };
1951 
1952 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1953 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1954 {
1955    int diff,dc,k;
1956    int t;
1957 
1958    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1959    t = stbi__jpeg_huff_decode(j, hdc);
1960    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1961 
1962    // 0 all the ac values now so we can do it 32-bits at a time
1963    memset(data,0,64*sizeof(data[0]));
1964 
1965    diff = t ? stbi__extend_receive(j, t) : 0;
1966    dc = j->img_comp[b].dc_pred + diff;
1967    j->img_comp[b].dc_pred = dc;
1968    data[0] = (short) (dc * dequant[0]);
1969 
1970    // decode AC components, see JPEG spec
1971    k = 1;
1972    do {
1973       unsigned int zig;
1974       int c,r,s;
1975       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1976       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1977       r = fac[c];
1978       if (r) { // fast-AC path
1979          k += (r >> 4) & 15; // run
1980          s = r & 15; // combined length
1981          j->code_buffer <<= s;
1982          j->code_bits -= s;
1983          // decode into unzigzag'd location
1984          zig = stbi__jpeg_dezigzag[k++];
1985          data[zig] = (short) ((r >> 8) * dequant[zig]);
1986       } else {
1987          int rs = stbi__jpeg_huff_decode(j, hac);
1988          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1989          s = rs & 15;
1990          r = rs >> 4;
1991          if (s == 0) {
1992             if (rs != 0xf0) break; // end block
1993             k += 16;
1994          } else {
1995             k += r;
1996             // decode into unzigzag'd location
1997             zig = stbi__jpeg_dezigzag[k++];
1998             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1999          }
2000       }
2001    } while (k < 64);
2002    return 1;
2003 }
2004 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)2005 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2006 {
2007    int diff,dc;
2008    int t;
2009    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2010 
2011    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2012 
2013    if (j->succ_high == 0) {
2014       // first scan for DC coefficient, must be first
2015       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2016       t = stbi__jpeg_huff_decode(j, hdc);
2017       diff = t ? stbi__extend_receive(j, t) : 0;
2018 
2019       dc = j->img_comp[b].dc_pred + diff;
2020       j->img_comp[b].dc_pred = dc;
2021       data[0] = (short) (dc << j->succ_low);
2022    } else {
2023       // refinement scan for DC coefficient
2024       if (stbi__jpeg_get_bit(j))
2025          data[0] += (short) (1 << j->succ_low);
2026    }
2027    return 1;
2028 }
2029 
2030 // @OPTIMIZE: store non-zigzagged during the decode passes,
2031 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)2032 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2033 {
2034    int k;
2035    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2036 
2037    if (j->succ_high == 0) {
2038       int shift = j->succ_low;
2039 
2040       if (j->eob_run) {
2041          --j->eob_run;
2042          return 1;
2043       }
2044 
2045       k = j->spec_start;
2046       do {
2047          unsigned int zig;
2048          int c,r,s;
2049          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2050          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2051          r = fac[c];
2052          if (r) { // fast-AC path
2053             k += (r >> 4) & 15; // run
2054             s = r & 15; // combined length
2055             j->code_buffer <<= s;
2056             j->code_bits -= s;
2057             zig = stbi__jpeg_dezigzag[k++];
2058             data[zig] = (short) ((r >> 8) << shift);
2059          } else {
2060             int rs = stbi__jpeg_huff_decode(j, hac);
2061             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2062             s = rs & 15;
2063             r = rs >> 4;
2064             if (s == 0) {
2065                if (r < 15) {
2066                   j->eob_run = (1 << r);
2067                   if (r)
2068                      j->eob_run += stbi__jpeg_get_bits(j, r);
2069                   --j->eob_run;
2070                   break;
2071                }
2072                k += 16;
2073             } else {
2074                k += r;
2075                zig = stbi__jpeg_dezigzag[k++];
2076                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2077             }
2078          }
2079       } while (k <= j->spec_end);
2080    } else {
2081       // refinement scan for these AC coefficients
2082 
2083       short bit = (short) (1 << j->succ_low);
2084 
2085       if (j->eob_run) {
2086          --j->eob_run;
2087          for (k = j->spec_start; k <= j->spec_end; ++k) {
2088             short *p = &data[stbi__jpeg_dezigzag[k]];
2089             if (*p != 0)
2090                if (stbi__jpeg_get_bit(j))
2091                   if ((*p & bit)==0) {
2092                      if (*p > 0)
2093                         *p += bit;
2094                      else
2095                         *p -= bit;
2096                   }
2097          }
2098       } else {
2099          k = j->spec_start;
2100          do {
2101             int r,s;
2102             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2103             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2104             s = rs & 15;
2105             r = rs >> 4;
2106             if (s == 0) {
2107                if (r < 15) {
2108                   j->eob_run = (1 << r) - 1;
2109                   if (r)
2110                      j->eob_run += stbi__jpeg_get_bits(j, r);
2111                   r = 64; // force end of block
2112                } else {
2113                   // r=15 s=0 should write 16 0s, so we just do
2114                   // a run of 15 0s and then write s (which is 0),
2115                   // so we don't have to do anything special here
2116                }
2117             } else {
2118                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2119                // sign bit
2120                if (stbi__jpeg_get_bit(j))
2121                   s = bit;
2122                else
2123                   s = -bit;
2124             }
2125 
2126             // advance by r
2127             while (k <= j->spec_end) {
2128                short *p = &data[stbi__jpeg_dezigzag[k++]];
2129                if (*p != 0) {
2130                   if (stbi__jpeg_get_bit(j))
2131                      if ((*p & bit)==0) {
2132                         if (*p > 0)
2133                            *p += bit;
2134                         else
2135                            *p -= bit;
2136                      }
2137                } else {
2138                   if (r == 0) {
2139                      *p = (short) s;
2140                      break;
2141                   }
2142                   --r;
2143                }
2144             }
2145          } while (k <= j->spec_end);
2146       }
2147    }
2148    return 1;
2149 }
2150 
2151 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2152 stbi_inline static stbi_uc stbi__clamp(int x)
2153 {
2154    // trick to use a single test to catch both cases
2155    if ((unsigned int) x > 255) {
2156       if (x < 0) return 0;
2157       if (x > 255) return 255;
2158    }
2159    return (stbi_uc) x;
2160 }
2161 
2162 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2163 #define stbi__fsh(x)  ((x) * 4096)
2164 
2165 // derived from jidctint -- DCT_ISLOW
2166 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2167    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2168    p2 = s2;                                    \
2169    p3 = s6;                                    \
2170    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2171    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2172    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2173    p2 = s0;                                    \
2174    p3 = s4;                                    \
2175    t0 = stbi__fsh(p2+p3);                      \
2176    t1 = stbi__fsh(p2-p3);                      \
2177    x0 = t0+t3;                                 \
2178    x3 = t0-t3;                                 \
2179    x1 = t1+t2;                                 \
2180    x2 = t1-t2;                                 \
2181    t0 = s7;                                    \
2182    t1 = s5;                                    \
2183    t2 = s3;                                    \
2184    t3 = s1;                                    \
2185    p3 = t0+t2;                                 \
2186    p4 = t1+t3;                                 \
2187    p1 = t0+t3;                                 \
2188    p2 = t1+t2;                                 \
2189    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2190    t0 = t0*stbi__f2f( 0.298631336f);           \
2191    t1 = t1*stbi__f2f( 2.053119869f);           \
2192    t2 = t2*stbi__f2f( 3.072711026f);           \
2193    t3 = t3*stbi__f2f( 1.501321110f);           \
2194    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2195    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2196    p3 = p3*stbi__f2f(-1.961570560f);           \
2197    p4 = p4*stbi__f2f(-0.390180644f);           \
2198    t3 += p1+p4;                                \
2199    t2 += p2+p3;                                \
2200    t1 += p2+p4;                                \
2201    t0 += p1+p3;
2202 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2203 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2204 {
2205    int i,val[64],*v=val;
2206    stbi_uc *o;
2207    short *d = data;
2208 
2209    // columns
2210    for (i=0; i < 8; ++i,++d, ++v) {
2211       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2212       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2213            && d[40]==0 && d[48]==0 && d[56]==0) {
2214          //    no shortcut                 0     seconds
2215          //    (1|2|3|4|5|6|7)==0          0     seconds
2216          //    all separate               -0.047 seconds
2217          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2218          int dcterm = d[0]*4;
2219          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2220       } else {
2221          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2222          // constants scaled things up by 1<<12; let's bring them back
2223          // down, but keep 2 extra bits of precision
2224          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2225          v[ 0] = (x0+t3) >> 10;
2226          v[56] = (x0-t3) >> 10;
2227          v[ 8] = (x1+t2) >> 10;
2228          v[48] = (x1-t2) >> 10;
2229          v[16] = (x2+t1) >> 10;
2230          v[40] = (x2-t1) >> 10;
2231          v[24] = (x3+t0) >> 10;
2232          v[32] = (x3-t0) >> 10;
2233       }
2234    }
2235 
2236    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2237       // no fast case since the first 1D IDCT spread components out
2238       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2239       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2240       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2241       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2242       // so we want to round that, which means adding 0.5 * 1<<17,
2243       // aka 65536. Also, we'll end up with -128 to 127 that we want
2244       // to encode as 0..255 by adding 128, so we'll add that before the shift
2245       x0 += 65536 + (128<<17);
2246       x1 += 65536 + (128<<17);
2247       x2 += 65536 + (128<<17);
2248       x3 += 65536 + (128<<17);
2249       // tried computing the shifts into temps, or'ing the temps to see
2250       // if any were out of range, but that was slower
2251       o[0] = stbi__clamp((x0+t3) >> 17);
2252       o[7] = stbi__clamp((x0-t3) >> 17);
2253       o[1] = stbi__clamp((x1+t2) >> 17);
2254       o[6] = stbi__clamp((x1-t2) >> 17);
2255       o[2] = stbi__clamp((x2+t1) >> 17);
2256       o[5] = stbi__clamp((x2-t1) >> 17);
2257       o[3] = stbi__clamp((x3+t0) >> 17);
2258       o[4] = stbi__clamp((x3-t0) >> 17);
2259    }
2260 }
2261 
2262 #ifdef STBI_SSE2
2263 // sse2 integer IDCT. not the fastest possible implementation but it
2264 // produces bit-identical results to the generic C version so it's
2265 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2266 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2267 {
2268    // This is constructed to match our regular (generic) integer IDCT exactly.
2269    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2270    __m128i tmp;
2271 
2272    // dot product constant: even elems=x, odd elems=y
2273    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2274 
2275    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2276    // out(1) = c1[even]*x + c1[odd]*y
2277    #define dct_rot(out0,out1, x,y,c0,c1) \
2278       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2279       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2280       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2281       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2282       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2283       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2284 
2285    // out = in << 12  (in 16-bit, out 32-bit)
2286    #define dct_widen(out, in) \
2287       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2288       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2289 
2290    // wide add
2291    #define dct_wadd(out, a, b) \
2292       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2293       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2294 
2295    // wide sub
2296    #define dct_wsub(out, a, b) \
2297       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2298       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2299 
2300    // butterfly a/b, add bias, then shift by "s" and pack
2301    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2302       { \
2303          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2304          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2305          dct_wadd(sum, abiased, b); \
2306          dct_wsub(dif, abiased, b); \
2307          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2308          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2309       }
2310 
2311    // 8-bit interleave step (for transposes)
2312    #define dct_interleave8(a, b) \
2313       tmp = a; \
2314       a = _mm_unpacklo_epi8(a, b); \
2315       b = _mm_unpackhi_epi8(tmp, b)
2316 
2317    // 16-bit interleave step (for transposes)
2318    #define dct_interleave16(a, b) \
2319       tmp = a; \
2320       a = _mm_unpacklo_epi16(a, b); \
2321       b = _mm_unpackhi_epi16(tmp, b)
2322 
2323    #define dct_pass(bias,shift) \
2324       { \
2325          /* even part */ \
2326          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2327          __m128i sum04 = _mm_add_epi16(row0, row4); \
2328          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2329          dct_widen(t0e, sum04); \
2330          dct_widen(t1e, dif04); \
2331          dct_wadd(x0, t0e, t3e); \
2332          dct_wsub(x3, t0e, t3e); \
2333          dct_wadd(x1, t1e, t2e); \
2334          dct_wsub(x2, t1e, t2e); \
2335          /* odd part */ \
2336          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2337          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2338          __m128i sum17 = _mm_add_epi16(row1, row7); \
2339          __m128i sum35 = _mm_add_epi16(row3, row5); \
2340          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2341          dct_wadd(x4, y0o, y4o); \
2342          dct_wadd(x5, y1o, y5o); \
2343          dct_wadd(x6, y2o, y5o); \
2344          dct_wadd(x7, y3o, y4o); \
2345          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2346          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2347          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2348          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2349       }
2350 
2351    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2352    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2353    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2354    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2355    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2356    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2357    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2358    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2359 
2360    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2361    __m128i bias_0 = _mm_set1_epi32(512);
2362    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2363 
2364    // load
2365    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2366    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2367    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2368    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2369    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2370    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2371    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2372    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2373 
2374    // column pass
2375    dct_pass(bias_0, 10);
2376 
2377    {
2378       // 16bit 8x8 transpose pass 1
2379       dct_interleave16(row0, row4);
2380       dct_interleave16(row1, row5);
2381       dct_interleave16(row2, row6);
2382       dct_interleave16(row3, row7);
2383 
2384       // transpose pass 2
2385       dct_interleave16(row0, row2);
2386       dct_interleave16(row1, row3);
2387       dct_interleave16(row4, row6);
2388       dct_interleave16(row5, row7);
2389 
2390       // transpose pass 3
2391       dct_interleave16(row0, row1);
2392       dct_interleave16(row2, row3);
2393       dct_interleave16(row4, row5);
2394       dct_interleave16(row6, row7);
2395    }
2396 
2397    // row pass
2398    dct_pass(bias_1, 17);
2399 
2400    {
2401       // pack
2402       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2403       __m128i p1 = _mm_packus_epi16(row2, row3);
2404       __m128i p2 = _mm_packus_epi16(row4, row5);
2405       __m128i p3 = _mm_packus_epi16(row6, row7);
2406 
2407       // 8bit 8x8 transpose pass 1
2408       dct_interleave8(p0, p2); // a0e0a1e1...
2409       dct_interleave8(p1, p3); // c0g0c1g1...
2410 
2411       // transpose pass 2
2412       dct_interleave8(p0, p1); // a0c0e0g0...
2413       dct_interleave8(p2, p3); // b0d0f0h0...
2414 
2415       // transpose pass 3
2416       dct_interleave8(p0, p2); // a0b0c0d0...
2417       dct_interleave8(p1, p3); // a4b4c4d4...
2418 
2419       // store
2420       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2421       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2422       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2423       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2424       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2425       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2426       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2427       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2428    }
2429 
2430 #undef dct_const
2431 #undef dct_rot
2432 #undef dct_widen
2433 #undef dct_wadd
2434 #undef dct_wsub
2435 #undef dct_bfly32o
2436 #undef dct_interleave8
2437 #undef dct_interleave16
2438 #undef dct_pass
2439 }
2440 
2441 #endif // STBI_SSE2
2442 
2443 #ifdef STBI_NEON
2444 
2445 // NEON integer IDCT. should produce bit-identical
2446 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2447 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2448 {
2449    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2450 
2451    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2452    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2453    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2454    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2455    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2456    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2457    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2458    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2459    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2460    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2461    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2462    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2463 
2464 #define dct_long_mul(out, inq, coeff) \
2465    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2466    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2467 
2468 #define dct_long_mac(out, acc, inq, coeff) \
2469    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2470    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2471 
2472 #define dct_widen(out, inq) \
2473    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2474    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2475 
2476 // wide add
2477 #define dct_wadd(out, a, b) \
2478    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2479    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2480 
2481 // wide sub
2482 #define dct_wsub(out, a, b) \
2483    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2484    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2485 
2486 // butterfly a/b, then shift using "shiftop" by "s" and pack
2487 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2488    { \
2489       dct_wadd(sum, a, b); \
2490       dct_wsub(dif, a, b); \
2491       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2492       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2493    }
2494 
2495 #define dct_pass(shiftop, shift) \
2496    { \
2497       /* even part */ \
2498       int16x8_t sum26 = vaddq_s16(row2, row6); \
2499       dct_long_mul(p1e, sum26, rot0_0); \
2500       dct_long_mac(t2e, p1e, row6, rot0_1); \
2501       dct_long_mac(t3e, p1e, row2, rot0_2); \
2502       int16x8_t sum04 = vaddq_s16(row0, row4); \
2503       int16x8_t dif04 = vsubq_s16(row0, row4); \
2504       dct_widen(t0e, sum04); \
2505       dct_widen(t1e, dif04); \
2506       dct_wadd(x0, t0e, t3e); \
2507       dct_wsub(x3, t0e, t3e); \
2508       dct_wadd(x1, t1e, t2e); \
2509       dct_wsub(x2, t1e, t2e); \
2510       /* odd part */ \
2511       int16x8_t sum15 = vaddq_s16(row1, row5); \
2512       int16x8_t sum17 = vaddq_s16(row1, row7); \
2513       int16x8_t sum35 = vaddq_s16(row3, row5); \
2514       int16x8_t sum37 = vaddq_s16(row3, row7); \
2515       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2516       dct_long_mul(p5o, sumodd, rot1_0); \
2517       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2518       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2519       dct_long_mul(p3o, sum37, rot2_0); \
2520       dct_long_mul(p4o, sum15, rot2_1); \
2521       dct_wadd(sump13o, p1o, p3o); \
2522       dct_wadd(sump24o, p2o, p4o); \
2523       dct_wadd(sump23o, p2o, p3o); \
2524       dct_wadd(sump14o, p1o, p4o); \
2525       dct_long_mac(x4, sump13o, row7, rot3_0); \
2526       dct_long_mac(x5, sump24o, row5, rot3_1); \
2527       dct_long_mac(x6, sump23o, row3, rot3_2); \
2528       dct_long_mac(x7, sump14o, row1, rot3_3); \
2529       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2530       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2531       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2532       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2533    }
2534 
2535    // load
2536    row0 = vld1q_s16(data + 0*8);
2537    row1 = vld1q_s16(data + 1*8);
2538    row2 = vld1q_s16(data + 2*8);
2539    row3 = vld1q_s16(data + 3*8);
2540    row4 = vld1q_s16(data + 4*8);
2541    row5 = vld1q_s16(data + 5*8);
2542    row6 = vld1q_s16(data + 6*8);
2543    row7 = vld1q_s16(data + 7*8);
2544 
2545    // add DC bias
2546    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2547 
2548    // column pass
2549    dct_pass(vrshrn_n_s32, 10);
2550 
2551    // 16bit 8x8 transpose
2552    {
2553 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2554 // whether compilers actually get this is another story, sadly.
2555 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2556 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2557 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2558 
2559       // pass 1
2560       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2561       dct_trn16(row2, row3);
2562       dct_trn16(row4, row5);
2563       dct_trn16(row6, row7);
2564 
2565       // pass 2
2566       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2567       dct_trn32(row1, row3);
2568       dct_trn32(row4, row6);
2569       dct_trn32(row5, row7);
2570 
2571       // pass 3
2572       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2573       dct_trn64(row1, row5);
2574       dct_trn64(row2, row6);
2575       dct_trn64(row3, row7);
2576 
2577 #undef dct_trn16
2578 #undef dct_trn32
2579 #undef dct_trn64
2580    }
2581 
2582    // row pass
2583    // vrshrn_n_s32 only supports shifts up to 16, we need
2584    // 17. so do a non-rounding shift of 16 first then follow
2585    // up with a rounding shift by 1.
2586    dct_pass(vshrn_n_s32, 16);
2587 
2588    {
2589       // pack and round
2590       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2591       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2592       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2593       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2594       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2595       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2596       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2597       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2598 
2599       // again, these can translate into one instruction, but often don't.
2600 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2601 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2602 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2603 
2604       // sadly can't use interleaved stores here since we only write
2605       // 8 bytes to each scan line!
2606 
2607       // 8x8 8-bit transpose pass 1
2608       dct_trn8_8(p0, p1);
2609       dct_trn8_8(p2, p3);
2610       dct_trn8_8(p4, p5);
2611       dct_trn8_8(p6, p7);
2612 
2613       // pass 2
2614       dct_trn8_16(p0, p2);
2615       dct_trn8_16(p1, p3);
2616       dct_trn8_16(p4, p6);
2617       dct_trn8_16(p5, p7);
2618 
2619       // pass 3
2620       dct_trn8_32(p0, p4);
2621       dct_trn8_32(p1, p5);
2622       dct_trn8_32(p2, p6);
2623       dct_trn8_32(p3, p7);
2624 
2625       // store
2626       vst1_u8(out, p0); out += out_stride;
2627       vst1_u8(out, p1); out += out_stride;
2628       vst1_u8(out, p2); out += out_stride;
2629       vst1_u8(out, p3); out += out_stride;
2630       vst1_u8(out, p4); out += out_stride;
2631       vst1_u8(out, p5); out += out_stride;
2632       vst1_u8(out, p6); out += out_stride;
2633       vst1_u8(out, p7);
2634 
2635 #undef dct_trn8_8
2636 #undef dct_trn8_16
2637 #undef dct_trn8_32
2638    }
2639 
2640 #undef dct_long_mul
2641 #undef dct_long_mac
2642 #undef dct_widen
2643 #undef dct_wadd
2644 #undef dct_wsub
2645 #undef dct_bfly32o
2646 #undef dct_pass
2647 }
2648 
2649 #endif // STBI_NEON
2650 
2651 #define STBI__MARKER_none  0xff
2652 // if there's a pending marker from the entropy stream, return that
2653 // otherwise, fetch from the stream and get a marker. if there's no
2654 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2655 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2656 {
2657    stbi_uc x;
2658    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2659    x = stbi__get8(j->s);
2660    if (x != 0xff) return STBI__MARKER_none;
2661    while (x == 0xff)
2662       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2663    return x;
2664 }
2665 
2666 // in each scan, we'll have scan_n components, and the order
2667 // of the components is specified by order[]
2668 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2669 
2670 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2671 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2672 static void stbi__jpeg_reset(stbi__jpeg *j)
2673 {
2674    j->code_bits = 0;
2675    j->code_buffer = 0;
2676    j->nomore = 0;
2677    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2678    j->marker = STBI__MARKER_none;
2679    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2680    j->eob_run = 0;
2681    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2682    // since we don't even allow 1<<30 pixels
2683 }
2684 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2685 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2686 {
2687    stbi__jpeg_reset(z);
2688    if (!z->progressive) {
2689       if (z->scan_n == 1) {
2690          int i,j;
2691          STBI_SIMD_ALIGN(short, data[64]);
2692          int n = z->order[0];
2693          // non-interleaved data, we just need to process one block at a time,
2694          // in trivial scanline order
2695          // number of blocks to do just depends on how many actual "pixels" this
2696          // component has, independent of interleaved MCU blocking and such
2697          int w = (z->img_comp[n].x+7) >> 3;
2698          int h = (z->img_comp[n].y+7) >> 3;
2699          for (j=0; j < h; ++j) {
2700             for (i=0; i < w; ++i) {
2701                int ha = z->img_comp[n].ha;
2702                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2703                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2704                // every data block is an MCU, so countdown the restart interval
2705                if (--z->todo <= 0) {
2706                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2707                   // if it's NOT a restart, then just bail, so we get corrupt data
2708                   // rather than no data
2709                   if (!STBI__RESTART(z->marker)) return 1;
2710                   stbi__jpeg_reset(z);
2711                }
2712             }
2713          }
2714          return 1;
2715       } else { // interleaved
2716          int i,j,k,x,y;
2717          STBI_SIMD_ALIGN(short, data[64]);
2718          for (j=0; j < z->img_mcu_y; ++j) {
2719             for (i=0; i < z->img_mcu_x; ++i) {
2720                // scan an interleaved mcu... process scan_n components in order
2721                for (k=0; k < z->scan_n; ++k) {
2722                   int n = z->order[k];
2723                   // scan out an mcu's worth of this component; that's just determined
2724                   // by the basic H and V specified for the component
2725                   for (y=0; y < z->img_comp[n].v; ++y) {
2726                      for (x=0; x < z->img_comp[n].h; ++x) {
2727                         int x2 = (i*z->img_comp[n].h + x)*8;
2728                         int y2 = (j*z->img_comp[n].v + y)*8;
2729                         int ha = z->img_comp[n].ha;
2730                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2731                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2732                      }
2733                   }
2734                }
2735                // after all interleaved components, that's an interleaved MCU,
2736                // so now count down the restart interval
2737                if (--z->todo <= 0) {
2738                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2739                   if (!STBI__RESTART(z->marker)) return 1;
2740                   stbi__jpeg_reset(z);
2741                }
2742             }
2743          }
2744          return 1;
2745       }
2746    } else {
2747       if (z->scan_n == 1) {
2748          int i,j;
2749          int n = z->order[0];
2750          // non-interleaved data, we just need to process one block at a time,
2751          // in trivial scanline order
2752          // number of blocks to do just depends on how many actual "pixels" this
2753          // component has, independent of interleaved MCU blocking and such
2754          int w = (z->img_comp[n].x+7) >> 3;
2755          int h = (z->img_comp[n].y+7) >> 3;
2756          for (j=0; j < h; ++j) {
2757             for (i=0; i < w; ++i) {
2758                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2759                if (z->spec_start == 0) {
2760                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2761                      return 0;
2762                } else {
2763                   int ha = z->img_comp[n].ha;
2764                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2765                      return 0;
2766                }
2767                // every data block is an MCU, so countdown the restart interval
2768                if (--z->todo <= 0) {
2769                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2770                   if (!STBI__RESTART(z->marker)) return 1;
2771                   stbi__jpeg_reset(z);
2772                }
2773             }
2774          }
2775          return 1;
2776       } else { // interleaved
2777          int i,j,k,x,y;
2778          for (j=0; j < z->img_mcu_y; ++j) {
2779             for (i=0; i < z->img_mcu_x; ++i) {
2780                // scan an interleaved mcu... process scan_n components in order
2781                for (k=0; k < z->scan_n; ++k) {
2782                   int n = z->order[k];
2783                   // scan out an mcu's worth of this component; that's just determined
2784                   // by the basic H and V specified for the component
2785                   for (y=0; y < z->img_comp[n].v; ++y) {
2786                      for (x=0; x < z->img_comp[n].h; ++x) {
2787                         int x2 = (i*z->img_comp[n].h + x);
2788                         int y2 = (j*z->img_comp[n].v + y);
2789                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2790                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2791                            return 0;
2792                      }
2793                   }
2794                }
2795                // after all interleaved components, that's an interleaved MCU,
2796                // so now count down the restart interval
2797                if (--z->todo <= 0) {
2798                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2799                   if (!STBI__RESTART(z->marker)) return 1;
2800                   stbi__jpeg_reset(z);
2801                }
2802             }
2803          }
2804          return 1;
2805       }
2806    }
2807 }
2808 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2809 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2810 {
2811    int i;
2812    for (i=0; i < 64; ++i)
2813       data[i] *= dequant[i];
2814 }
2815 
stbi__jpeg_finish(stbi__jpeg * z)2816 static void stbi__jpeg_finish(stbi__jpeg *z)
2817 {
2818    if (z->progressive) {
2819       // dequantize and idct the data
2820       int i,j,n;
2821       for (n=0; n < z->s->img_n; ++n) {
2822          int w = (z->img_comp[n].x+7) >> 3;
2823          int h = (z->img_comp[n].y+7) >> 3;
2824          for (j=0; j < h; ++j) {
2825             for (i=0; i < w; ++i) {
2826                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2827                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2828                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2829             }
2830          }
2831       }
2832    }
2833 }
2834 
stbi__process_marker(stbi__jpeg * z,int m)2835 static int stbi__process_marker(stbi__jpeg *z, int m)
2836 {
2837    int L;
2838    switch (m) {
2839       case STBI__MARKER_none: // no marker found
2840          return stbi__err("expected marker","Corrupt JPEG");
2841 
2842       case 0xDD: // DRI - specify restart interval
2843          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2844          z->restart_interval = stbi__get16be(z->s);
2845          return 1;
2846 
2847       case 0xDB: // DQT - define quantization table
2848          L = stbi__get16be(z->s)-2;
2849          while (L > 0) {
2850             int q = stbi__get8(z->s);
2851             int p = q >> 4, sixteen = (p != 0);
2852             int t = q & 15,i;
2853             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2854             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2855 
2856             for (i=0; i < 64; ++i)
2857                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2858             L -= (sixteen ? 129 : 65);
2859          }
2860          return L==0;
2861 
2862       case 0xC4: // DHT - define huffman table
2863          L = stbi__get16be(z->s)-2;
2864          while (L > 0) {
2865             stbi_uc *v;
2866             int sizes[16],i,n=0;
2867             int q = stbi__get8(z->s);
2868             int tc = q >> 4;
2869             int th = q & 15;
2870             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2871             for (i=0; i < 16; ++i) {
2872                sizes[i] = stbi__get8(z->s);
2873                n += sizes[i];
2874             }
2875             L -= 17;
2876             if (tc == 0) {
2877                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2878                v = z->huff_dc[th].values;
2879             } else {
2880                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2881                v = z->huff_ac[th].values;
2882             }
2883             for (i=0; i < n; ++i)
2884                v[i] = stbi__get8(z->s);
2885             if (tc != 0)
2886                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2887             L -= n;
2888          }
2889          return L==0;
2890    }
2891 
2892    // check for comment block or APP blocks
2893    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2894       L = stbi__get16be(z->s);
2895       if (L < 2) {
2896          if (m == 0xFE)
2897             return stbi__err("bad COM len","Corrupt JPEG");
2898          else
2899             return stbi__err("bad APP len","Corrupt JPEG");
2900       }
2901       L -= 2;
2902 
2903       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2904          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2905          int ok = 1;
2906          int i;
2907          for (i=0; i < 5; ++i)
2908             if (stbi__get8(z->s) != tag[i])
2909                ok = 0;
2910          L -= 5;
2911          if (ok)
2912             z->jfif = 1;
2913       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2914          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2915          int ok = 1;
2916          int i;
2917          for (i=0; i < 6; ++i)
2918             if (stbi__get8(z->s) != tag[i])
2919                ok = 0;
2920          L -= 6;
2921          if (ok) {
2922             stbi__get8(z->s); // version
2923             stbi__get16be(z->s); // flags0
2924             stbi__get16be(z->s); // flags1
2925             z->app14_color_transform = stbi__get8(z->s); // color transform
2926             L -= 6;
2927          }
2928       }
2929 
2930       stbi__skip(z->s, L);
2931       return 1;
2932    }
2933 
2934    return stbi__err("unknown marker","Corrupt JPEG");
2935 }
2936 
2937 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2938 static int stbi__process_scan_header(stbi__jpeg *z)
2939 {
2940    int i;
2941    int Ls = stbi__get16be(z->s);
2942    z->scan_n = stbi__get8(z->s);
2943    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2944    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2945    for (i=0; i < z->scan_n; ++i) {
2946       int id = stbi__get8(z->s), which;
2947       int q = stbi__get8(z->s);
2948       for (which = 0; which < z->s->img_n; ++which)
2949          if (z->img_comp[which].id == id)
2950             break;
2951       if (which == z->s->img_n) return 0; // no match
2952       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2953       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2954       z->order[i] = which;
2955    }
2956 
2957    {
2958       int aa;
2959       z->spec_start = stbi__get8(z->s);
2960       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2961       aa = stbi__get8(z->s);
2962       z->succ_high = (aa >> 4);
2963       z->succ_low  = (aa & 15);
2964       if (z->progressive) {
2965          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2966             return stbi__err("bad SOS", "Corrupt JPEG");
2967       } else {
2968          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2969          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2970          z->spec_end = 63;
2971       }
2972    }
2973 
2974    return 1;
2975 }
2976 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2977 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2978 {
2979    int i;
2980    for (i=0; i < ncomp; ++i) {
2981       if (z->img_comp[i].raw_data) {
2982          STBI_FREE(z->img_comp[i].raw_data);
2983          z->img_comp[i].raw_data = NULL;
2984          z->img_comp[i].data = NULL;
2985       }
2986       if (z->img_comp[i].raw_coeff) {
2987          STBI_FREE(z->img_comp[i].raw_coeff);
2988          z->img_comp[i].raw_coeff = 0;
2989          z->img_comp[i].coeff = 0;
2990       }
2991       if (z->img_comp[i].linebuf) {
2992          STBI_FREE(z->img_comp[i].linebuf);
2993          z->img_comp[i].linebuf = NULL;
2994       }
2995    }
2996    return why;
2997 }
2998 
stbi__process_frame_header(stbi__jpeg * z,int scan)2999 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3000 {
3001    stbi__context *s = z->s;
3002    int Lf,p,i,q, h_max=1,v_max=1,c;
3003    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3004    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3005    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3006    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3007    c = stbi__get8(s);
3008    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3009    s->img_n = c;
3010    for (i=0; i < c; ++i) {
3011       z->img_comp[i].data = NULL;
3012       z->img_comp[i].linebuf = NULL;
3013    }
3014 
3015    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3016 
3017    z->rgb = 0;
3018    for (i=0; i < s->img_n; ++i) {
3019       static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3020       z->img_comp[i].id = stbi__get8(s);
3021       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3022          ++z->rgb;
3023       q = stbi__get8(s);
3024       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3025       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3026       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3027    }
3028 
3029    if (scan != STBI__SCAN_load) return 1;
3030 
3031    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3032 
3033    for (i=0; i < s->img_n; ++i) {
3034       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3035       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3036    }
3037 
3038    // compute interleaved mcu info
3039    z->img_h_max = h_max;
3040    z->img_v_max = v_max;
3041    z->img_mcu_w = h_max * 8;
3042    z->img_mcu_h = v_max * 8;
3043    // these sizes can't be more than 17 bits
3044    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3045    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3046 
3047    for (i=0; i < s->img_n; ++i) {
3048       // number of effective pixels (e.g. for non-interleaved MCU)
3049       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3050       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3051       // to simplify generation, we'll allocate enough memory to decode
3052       // the bogus oversized data from using interleaved MCUs and their
3053       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3054       // discard the extra data until colorspace conversion
3055       //
3056       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3057       // so these muls can't overflow with 32-bit ints (which we require)
3058       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3059       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3060       z->img_comp[i].coeff = 0;
3061       z->img_comp[i].raw_coeff = 0;
3062       z->img_comp[i].linebuf = NULL;
3063       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3064       if (z->img_comp[i].raw_data == NULL)
3065          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3066       // align blocks for idct using mmx/sse
3067       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3068       if (z->progressive) {
3069          // w2, h2 are multiples of 8 (see above)
3070          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3071          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3072          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3073          if (z->img_comp[i].raw_coeff == NULL)
3074             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3075          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3076       }
3077    }
3078 
3079    return 1;
3080 }
3081 
3082 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3083 #define stbi__DNL(x)         ((x) == 0xdc)
3084 #define stbi__SOI(x)         ((x) == 0xd8)
3085 #define stbi__EOI(x)         ((x) == 0xd9)
3086 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3087 #define stbi__SOS(x)         ((x) == 0xda)
3088 
3089 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3090 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3091 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3092 {
3093    int m;
3094    z->jfif = 0;
3095    z->app14_color_transform = -1; // valid values are 0,1,2
3096    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3097    m = stbi__get_marker(z);
3098    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3099    if (scan == STBI__SCAN_type) return 1;
3100    m = stbi__get_marker(z);
3101    while (!stbi__SOF(m)) {
3102       if (!stbi__process_marker(z,m)) return 0;
3103       m = stbi__get_marker(z);
3104       while (m == STBI__MARKER_none) {
3105          // some files have extra padding after their blocks, so ok, we'll scan
3106          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3107          m = stbi__get_marker(z);
3108       }
3109    }
3110    z->progressive = stbi__SOF_progressive(m);
3111    if (!stbi__process_frame_header(z, scan)) return 0;
3112    return 1;
3113 }
3114 
3115 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3116 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3117 {
3118    int m;
3119    for (m = 0; m < 4; m++) {
3120       j->img_comp[m].raw_data = NULL;
3121       j->img_comp[m].raw_coeff = NULL;
3122    }
3123    j->restart_interval = 0;
3124    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3125    m = stbi__get_marker(j);
3126    while (!stbi__EOI(m)) {
3127       if (stbi__SOS(m)) {
3128          if (!stbi__process_scan_header(j)) return 0;
3129          if (!stbi__parse_entropy_coded_data(j)) return 0;
3130          if (j->marker == STBI__MARKER_none ) {
3131             // handle 0s at the end of image data from IP Kamera 9060
3132             while (!stbi__at_eof(j->s)) {
3133                int x = stbi__get8(j->s);
3134                if (x == 255) {
3135                   j->marker = stbi__get8(j->s);
3136                   break;
3137                }
3138             }
3139             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3140          }
3141       } else if (stbi__DNL(m)) {
3142          int Ld = stbi__get16be(j->s);
3143          stbi__uint32 NL = stbi__get16be(j->s);
3144          if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3145          if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3146       } else {
3147          if (!stbi__process_marker(j, m)) return 0;
3148       }
3149       m = stbi__get_marker(j);
3150    }
3151    if (j->progressive)
3152       stbi__jpeg_finish(j);
3153    return 1;
3154 }
3155 
3156 // static jfif-centered resampling (across block boundaries)
3157 
3158 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3159                                     int w, int hs);
3160 
3161 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3162 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3163 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3164 {
3165    STBI_NOTUSED(out);
3166    STBI_NOTUSED(in_far);
3167    STBI_NOTUSED(w);
3168    STBI_NOTUSED(hs);
3169    return in_near;
3170 }
3171 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3172 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3173 {
3174    // need to generate two samples vertically for every one in input
3175    int i;
3176    STBI_NOTUSED(hs);
3177    for (i=0; i < w; ++i)
3178       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3179    return out;
3180 }
3181 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3182 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3183 {
3184    // need to generate two samples horizontally for every one in input
3185    int i;
3186    stbi_uc *input = in_near;
3187 
3188    if (w == 1) {
3189       // if only one sample, can't do any interpolation
3190       out[0] = out[1] = input[0];
3191       return out;
3192    }
3193 
3194    out[0] = input[0];
3195    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3196    for (i=1; i < w-1; ++i) {
3197       int n = 3*input[i]+2;
3198       out[i*2+0] = stbi__div4(n+input[i-1]);
3199       out[i*2+1] = stbi__div4(n+input[i+1]);
3200    }
3201    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3202    out[i*2+1] = input[w-1];
3203 
3204    STBI_NOTUSED(in_far);
3205    STBI_NOTUSED(hs);
3206 
3207    return out;
3208 }
3209 
3210 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3211 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3212 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3213 {
3214    // need to generate 2x2 samples for every one in input
3215    int i,t0,t1;
3216    if (w == 1) {
3217       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3218       return out;
3219    }
3220 
3221    t1 = 3*in_near[0] + in_far[0];
3222    out[0] = stbi__div4(t1+2);
3223    for (i=1; i < w; ++i) {
3224       t0 = t1;
3225       t1 = 3*in_near[i]+in_far[i];
3226       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3227       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3228    }
3229    out[w*2-1] = stbi__div4(t1+2);
3230 
3231    STBI_NOTUSED(hs);
3232 
3233    return out;
3234 }
3235 
3236 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3237 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3238 {
3239    // need to generate 2x2 samples for every one in input
3240    int i=0,t0,t1;
3241 
3242    if (w == 1) {
3243       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3244       return out;
3245    }
3246 
3247    t1 = 3*in_near[0] + in_far[0];
3248    // process groups of 8 pixels for as long as we can.
3249    // note we can't handle the last pixel in a row in this loop
3250    // because we need to handle the filter boundary conditions.
3251    for (; i < ((w-1) & ~7); i += 8) {
3252 #if defined(STBI_SSE2)
3253       // load and perform the vertical filtering pass
3254       // this uses 3*x + y = 4*x + (y - x)
3255       __m128i zero  = _mm_setzero_si128();
3256       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3257       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3258       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3259       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3260       __m128i diff  = _mm_sub_epi16(farw, nearw);
3261       __m128i nears = _mm_slli_epi16(nearw, 2);
3262       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3263 
3264       // horizontal filter works the same based on shifted vers of current
3265       // row. "prev" is current row shifted right by 1 pixel; we need to
3266       // insert the previous pixel value (from t1).
3267       // "next" is current row shifted left by 1 pixel, with first pixel
3268       // of next block of 8 pixels added in.
3269       __m128i prv0 = _mm_slli_si128(curr, 2);
3270       __m128i nxt0 = _mm_srli_si128(curr, 2);
3271       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3272       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3273 
3274       // horizontal filter, polyphase implementation since it's convenient:
3275       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3276       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3277       // note the shared term.
3278       __m128i bias  = _mm_set1_epi16(8);
3279       __m128i curs = _mm_slli_epi16(curr, 2);
3280       __m128i prvd = _mm_sub_epi16(prev, curr);
3281       __m128i nxtd = _mm_sub_epi16(next, curr);
3282       __m128i curb = _mm_add_epi16(curs, bias);
3283       __m128i even = _mm_add_epi16(prvd, curb);
3284       __m128i odd  = _mm_add_epi16(nxtd, curb);
3285 
3286       // interleave even and odd pixels, then undo scaling.
3287       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3288       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3289       __m128i de0  = _mm_srli_epi16(int0, 4);
3290       __m128i de1  = _mm_srli_epi16(int1, 4);
3291 
3292       // pack and write output
3293       __m128i outv = _mm_packus_epi16(de0, de1);
3294       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3295 #elif defined(STBI_NEON)
3296       // load and perform the vertical filtering pass
3297       // this uses 3*x + y = 4*x + (y - x)
3298       uint8x8_t farb  = vld1_u8(in_far + i);
3299       uint8x8_t nearb = vld1_u8(in_near + i);
3300       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3301       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3302       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3303 
3304       // horizontal filter works the same based on shifted vers of current
3305       // row. "prev" is current row shifted right by 1 pixel; we need to
3306       // insert the previous pixel value (from t1).
3307       // "next" is current row shifted left by 1 pixel, with first pixel
3308       // of next block of 8 pixels added in.
3309       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3310       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3311       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3312       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3313 
3314       // horizontal filter, polyphase implementation since it's convenient:
3315       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3316       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3317       // note the shared term.
3318       int16x8_t curs = vshlq_n_s16(curr, 2);
3319       int16x8_t prvd = vsubq_s16(prev, curr);
3320       int16x8_t nxtd = vsubq_s16(next, curr);
3321       int16x8_t even = vaddq_s16(curs, prvd);
3322       int16x8_t odd  = vaddq_s16(curs, nxtd);
3323 
3324       // undo scaling and round, then store with even/odd phases interleaved
3325       uint8x8x2_t o;
3326       o.val[0] = vqrshrun_n_s16(even, 4);
3327       o.val[1] = vqrshrun_n_s16(odd,  4);
3328       vst2_u8(out + i*2, o);
3329 #endif
3330 
3331       // "previous" value for next iter
3332       t1 = 3*in_near[i+7] + in_far[i+7];
3333    }
3334 
3335    t0 = t1;
3336    t1 = 3*in_near[i] + in_far[i];
3337    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3338 
3339    for (++i; i < w; ++i) {
3340       t0 = t1;
3341       t1 = 3*in_near[i]+in_far[i];
3342       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3343       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3344    }
3345    out[w*2-1] = stbi__div4(t1+2);
3346 
3347    STBI_NOTUSED(hs);
3348 
3349    return out;
3350 }
3351 #endif
3352 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3353 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3354 {
3355    // resample with nearest-neighbor
3356    int i,j;
3357    STBI_NOTUSED(in_far);
3358    for (i=0; i < w; ++i)
3359       for (j=0; j < hs; ++j)
3360          out[i*hs+j] = in_near[i];
3361    return out;
3362 }
3363 
3364 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3365 // to make sure the code produces the same results in both SIMD and scalar
3366 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3367 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3368 {
3369    int i;
3370    for (i=0; i < count; ++i) {
3371       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3372       int r,g,b;
3373       int cr = pcr[i] - 128;
3374       int cb = pcb[i] - 128;
3375       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3376       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3377       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3378       r >>= 20;
3379       g >>= 20;
3380       b >>= 20;
3381       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3382       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3383       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3384       out[0] = (stbi_uc)r;
3385       out[1] = (stbi_uc)g;
3386       out[2] = (stbi_uc)b;
3387       out[3] = 255;
3388       out += step;
3389    }
3390 }
3391 
3392 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3393 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3394 {
3395    int i = 0;
3396 
3397 #ifdef STBI_SSE2
3398    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3399    // it's useful in practice (you wouldn't use it for textures, for example).
3400    // so just accelerate step == 4 case.
3401    if (step == 4) {
3402       // this is a fairly straightforward implementation and not super-optimized.
3403       __m128i signflip  = _mm_set1_epi8(-0x80);
3404       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3405       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3406       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3407       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3408       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3409       __m128i xw = _mm_set1_epi16(255); // alpha channel
3410 
3411       for (; i+7 < count; i += 8) {
3412          // load
3413          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3414          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3415          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3416          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3417          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3418 
3419          // unpack to short (and left-shift cr, cb by 8)
3420          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3421          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3422          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3423 
3424          // color transform
3425          __m128i yws = _mm_srli_epi16(yw, 4);
3426          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3427          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3428          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3429          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3430          __m128i rws = _mm_add_epi16(cr0, yws);
3431          __m128i gwt = _mm_add_epi16(cb0, yws);
3432          __m128i bws = _mm_add_epi16(yws, cb1);
3433          __m128i gws = _mm_add_epi16(gwt, cr1);
3434 
3435          // descale
3436          __m128i rw = _mm_srai_epi16(rws, 4);
3437          __m128i bw = _mm_srai_epi16(bws, 4);
3438          __m128i gw = _mm_srai_epi16(gws, 4);
3439 
3440          // back to byte, set up for transpose
3441          __m128i brb = _mm_packus_epi16(rw, bw);
3442          __m128i gxb = _mm_packus_epi16(gw, xw);
3443 
3444          // transpose to interleave channels
3445          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3446          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3447          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3448          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3449 
3450          // store
3451          _mm_storeu_si128((__m128i *) (out + 0), o0);
3452          _mm_storeu_si128((__m128i *) (out + 16), o1);
3453          out += 32;
3454       }
3455    }
3456 #endif
3457 
3458 #ifdef STBI_NEON
3459    // in this version, step=3 support would be easy to add. but is there demand?
3460    if (step == 4) {
3461       // this is a fairly straightforward implementation and not super-optimized.
3462       uint8x8_t signflip = vdup_n_u8(0x80);
3463       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3464       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3465       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3466       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3467 
3468       for (; i+7 < count; i += 8) {
3469          // load
3470          uint8x8_t y_bytes  = vld1_u8(y + i);
3471          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3472          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3473          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3474          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3475 
3476          // expand to s16
3477          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3478          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3479          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3480 
3481          // color transform
3482          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3483          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3484          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3485          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3486          int16x8_t rws = vaddq_s16(yws, cr0);
3487          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3488          int16x8_t bws = vaddq_s16(yws, cb1);
3489 
3490          // undo scaling, round, convert to byte
3491          uint8x8x4_t o;
3492          o.val[0] = vqrshrun_n_s16(rws, 4);
3493          o.val[1] = vqrshrun_n_s16(gws, 4);
3494          o.val[2] = vqrshrun_n_s16(bws, 4);
3495          o.val[3] = vdup_n_u8(255);
3496 
3497          // store, interleaving r/g/b/a
3498          vst4_u8(out, o);
3499          out += 8*4;
3500       }
3501    }
3502 #endif
3503 
3504    for (; i < count; ++i) {
3505       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3506       int r,g,b;
3507       int cr = pcr[i] - 128;
3508       int cb = pcb[i] - 128;
3509       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3510       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3511       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3512       r >>= 20;
3513       g >>= 20;
3514       b >>= 20;
3515       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3516       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3517       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3518       out[0] = (stbi_uc)r;
3519       out[1] = (stbi_uc)g;
3520       out[2] = (stbi_uc)b;
3521       out[3] = 255;
3522       out += step;
3523    }
3524 }
3525 #endif
3526 
3527 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3528 static void stbi__setup_jpeg(stbi__jpeg *j)
3529 {
3530    j->idct_block_kernel = stbi__idct_block;
3531    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3532    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3533 
3534 #ifdef STBI_SSE2
3535    if (stbi__sse2_available()) {
3536       j->idct_block_kernel = stbi__idct_simd;
3537       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3538       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3539    }
3540 #endif
3541 
3542 #ifdef STBI_NEON
3543    j->idct_block_kernel = stbi__idct_simd;
3544    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3545    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3546 #endif
3547 }
3548 
3549 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3550 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3551 {
3552    stbi__free_jpeg_components(j, j->s->img_n, 0);
3553 }
3554 
3555 typedef struct
3556 {
3557    resample_row_func resample;
3558    stbi_uc *line0,*line1;
3559    int hs,vs;   // expansion factor in each axis
3560    int w_lores; // horizontal pixels pre-expansion
3561    int ystep;   // how far through vertical expansion we are
3562    int ypos;    // which pre-expansion row we're on
3563 } stbi__resample;
3564 
3565 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3566 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3567 {
3568    unsigned int t = x*y + 128;
3569    return (stbi_uc) ((t + (t >>8)) >> 8);
3570 }
3571 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3572 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3573 {
3574    int n, decode_n, is_rgb;
3575    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3576 
3577    // validate req_comp
3578    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3579 
3580    // load a jpeg image from whichever source, but leave in YCbCr format
3581    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3582 
3583    // determine actual number of components to generate
3584    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3585 
3586    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3587 
3588    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3589       decode_n = 1;
3590    else
3591       decode_n = z->s->img_n;
3592 
3593    // resample and color-convert
3594    {
3595       int k;
3596       unsigned int i,j;
3597       stbi_uc *output;
3598       stbi_uc *coutput[4];
3599 
3600       stbi__resample res_comp[4];
3601 
3602       for (k=0; k < decode_n; ++k) {
3603          stbi__resample *r = &res_comp[k];
3604 
3605          // allocate line buffer big enough for upsampling off the edges
3606          // with upsample factor of 4
3607          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3608          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3609 
3610          r->hs      = z->img_h_max / z->img_comp[k].h;
3611          r->vs      = z->img_v_max / z->img_comp[k].v;
3612          r->ystep   = r->vs >> 1;
3613          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3614          r->ypos    = 0;
3615          r->line0   = r->line1 = z->img_comp[k].data;
3616 
3617          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3618          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3619          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3620          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3621          else                               r->resample = stbi__resample_row_generic;
3622       }
3623 
3624       // can't error after this so, this is safe
3625       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3626       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3627 
3628       // now go ahead and resample
3629       for (j=0; j < z->s->img_y; ++j) {
3630          stbi_uc *out = output + n * z->s->img_x * j;
3631          for (k=0; k < decode_n; ++k) {
3632             stbi__resample *r = &res_comp[k];
3633             int y_bot = r->ystep >= (r->vs >> 1);
3634             coutput[k] = r->resample(z->img_comp[k].linebuf,
3635                                      y_bot ? r->line1 : r->line0,
3636                                      y_bot ? r->line0 : r->line1,
3637                                      r->w_lores, r->hs);
3638             if (++r->ystep >= r->vs) {
3639                r->ystep = 0;
3640                r->line0 = r->line1;
3641                if (++r->ypos < z->img_comp[k].y)
3642                   r->line1 += z->img_comp[k].w2;
3643             }
3644          }
3645          if (n >= 3) {
3646             stbi_uc *y = coutput[0];
3647             if (z->s->img_n == 3) {
3648                if (is_rgb) {
3649                   for (i=0; i < z->s->img_x; ++i) {
3650                      out[0] = y[i];
3651                      out[1] = coutput[1][i];
3652                      out[2] = coutput[2][i];
3653                      out[3] = 255;
3654                      out += n;
3655                   }
3656                } else {
3657                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3658                }
3659             } else if (z->s->img_n == 4) {
3660                if (z->app14_color_transform == 0) { // CMYK
3661                   for (i=0; i < z->s->img_x; ++i) {
3662                      stbi_uc m = coutput[3][i];
3663                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3664                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3665                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3666                      out[3] = 255;
3667                      out += n;
3668                   }
3669                } else if (z->app14_color_transform == 2) { // YCCK
3670                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3671                   for (i=0; i < z->s->img_x; ++i) {
3672                      stbi_uc m = coutput[3][i];
3673                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3674                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3675                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3676                      out += n;
3677                   }
3678                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3679                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3680                }
3681             } else
3682                for (i=0; i < z->s->img_x; ++i) {
3683                   out[0] = out[1] = out[2] = y[i];
3684                   out[3] = 255; // not used if n==3
3685                   out += n;
3686                }
3687          } else {
3688             if (is_rgb) {
3689                if (n == 1)
3690                   for (i=0; i < z->s->img_x; ++i)
3691                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3692                else {
3693                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3694                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3695                      out[1] = 255;
3696                   }
3697                }
3698             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3699                for (i=0; i < z->s->img_x; ++i) {
3700                   stbi_uc m = coutput[3][i];
3701                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3702                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3703                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3704                   out[0] = stbi__compute_y(r, g, b);
3705                   out[1] = 255;
3706                   out += n;
3707                }
3708             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3709                for (i=0; i < z->s->img_x; ++i) {
3710                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3711                   out[1] = 255;
3712                   out += n;
3713                }
3714             } else {
3715                stbi_uc *y = coutput[0];
3716                if (n == 1)
3717                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3718                else
3719                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3720             }
3721          }
3722       }
3723       stbi__cleanup_jpeg(z);
3724       *out_x = z->s->img_x;
3725       *out_y = z->s->img_y;
3726       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3727       return output;
3728    }
3729 }
3730 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3731 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3732 {
3733    unsigned char* result;
3734    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3735    STBI_NOTUSED(ri);
3736    j->s = s;
3737    stbi__setup_jpeg(j);
3738    result = load_jpeg_image(j, x,y,comp,req_comp);
3739    STBI_FREE(j);
3740    return result;
3741 }
3742 
stbi__jpeg_test(stbi__context * s)3743 static int stbi__jpeg_test(stbi__context *s)
3744 {
3745    int r;
3746    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3747    j->s = s;
3748    stbi__setup_jpeg(j);
3749    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3750    stbi__rewind(s);
3751    STBI_FREE(j);
3752    return r;
3753 }
3754 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3755 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3756 {
3757    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3758       stbi__rewind( j->s );
3759       return 0;
3760    }
3761    if (x) *x = j->s->img_x;
3762    if (y) *y = j->s->img_y;
3763    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3764    return 1;
3765 }
3766 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3767 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3768 {
3769    int result;
3770    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3771    j->s = s;
3772    result = stbi__jpeg_info_raw(j, x, y, comp);
3773    STBI_FREE(j);
3774    return result;
3775 }
3776 #endif
3777 
3778 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3779 //    simple implementation
3780 //      - all input must be provided in an upfront buffer
3781 //      - all output is written to a single output buffer (can malloc/realloc)
3782 //    performance
3783 //      - fast huffman
3784 
3785 #ifndef STBI_NO_ZLIB
3786 
3787 // fast-way is faster to check than jpeg huffman, but slow way is slower
3788 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3789 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3790 
3791 // zlib-style huffman encoding
3792 // (jpegs packs from left, zlib from right, so can't share code)
3793 typedef struct
3794 {
3795    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3796    stbi__uint16 firstcode[16];
3797    int maxcode[17];
3798    stbi__uint16 firstsymbol[16];
3799    stbi_uc  size[288];
3800    stbi__uint16 value[288];
3801 } stbi__zhuffman;
3802 
stbi__bitreverse16(int n)3803 stbi_inline static int stbi__bitreverse16(int n)
3804 {
3805   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3806   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3807   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3808   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3809   return n;
3810 }
3811 
stbi__bit_reverse(int v,int bits)3812 stbi_inline static int stbi__bit_reverse(int v, int bits)
3813 {
3814    STBI_ASSERT(bits <= 16);
3815    // to bit reverse n bits, reverse 16 and shift
3816    // e.g. 11 bits, bit reverse and shift away 5
3817    return stbi__bitreverse16(v) >> (16-bits);
3818 }
3819 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3820 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3821 {
3822    int i,k=0;
3823    int code, next_code[16], sizes[17];
3824 
3825    // DEFLATE spec for generating codes
3826    memset(sizes, 0, sizeof(sizes));
3827    memset(z->fast, 0, sizeof(z->fast));
3828    for (i=0; i < num; ++i)
3829       ++sizes[sizelist[i]];
3830    sizes[0] = 0;
3831    for (i=1; i < 16; ++i)
3832       if (sizes[i] > (1 << i))
3833          return stbi__err("bad sizes", "Corrupt PNG");
3834    code = 0;
3835    for (i=1; i < 16; ++i) {
3836       next_code[i] = code;
3837       z->firstcode[i] = (stbi__uint16) code;
3838       z->firstsymbol[i] = (stbi__uint16) k;
3839       code = (code + sizes[i]);
3840       if (sizes[i])
3841          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3842       z->maxcode[i] = code << (16-i); // preshift for inner loop
3843       code <<= 1;
3844       k += sizes[i];
3845    }
3846    z->maxcode[16] = 0x10000; // sentinel
3847    for (i=0; i < num; ++i) {
3848       int s = sizelist[i];
3849       if (s) {
3850          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3851          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3852          z->size [c] = (stbi_uc     ) s;
3853          z->value[c] = (stbi__uint16) i;
3854          if (s <= STBI__ZFAST_BITS) {
3855             int j = stbi__bit_reverse(next_code[s],s);
3856             while (j < (1 << STBI__ZFAST_BITS)) {
3857                z->fast[j] = fastv;
3858                j += (1 << s);
3859             }
3860          }
3861          ++next_code[s];
3862       }
3863    }
3864    return 1;
3865 }
3866 
3867 // zlib-from-memory implementation for PNG reading
3868 //    because PNG allows splitting the zlib stream arbitrarily,
3869 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3870 //    we require PNG read all the IDATs and combine them into a single
3871 //    memory buffer
3872 
3873 typedef struct
3874 {
3875    stbi_uc *zbuffer, *zbuffer_end;
3876    int num_bits;
3877    stbi__uint32 code_buffer;
3878 
3879    char *zout;
3880    char *zout_start;
3881    char *zout_end;
3882    int   z_expandable;
3883 
3884    stbi__zhuffman z_length, z_distance;
3885 } stbi__zbuf;
3886 
stbi__zget8(stbi__zbuf * z)3887 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3888 {
3889    if (z->zbuffer >= z->zbuffer_end) return 0;
3890    return *z->zbuffer++;
3891 }
3892 
stbi__fill_bits(stbi__zbuf * z)3893 static void stbi__fill_bits(stbi__zbuf *z)
3894 {
3895    do {
3896       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3897       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3898       z->num_bits += 8;
3899    } while (z->num_bits <= 24);
3900 }
3901 
stbi__zreceive(stbi__zbuf * z,int n)3902 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3903 {
3904    unsigned int k;
3905    if (z->num_bits < n) stbi__fill_bits(z);
3906    k = z->code_buffer & ((1 << n) - 1);
3907    z->code_buffer >>= n;
3908    z->num_bits -= n;
3909    return k;
3910 }
3911 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3912 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3913 {
3914    int b,s,k;
3915    // not resolved by fast table, so compute it the slow way
3916    // use jpeg approach, which requires MSbits at top
3917    k = stbi__bit_reverse(a->code_buffer, 16);
3918    for (s=STBI__ZFAST_BITS+1; ; ++s)
3919       if (k < z->maxcode[s])
3920          break;
3921    if (s == 16) return -1; // invalid code!
3922    // code size is s, so:
3923    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3924    STBI_ASSERT(z->size[b] == s);
3925    a->code_buffer >>= s;
3926    a->num_bits -= s;
3927    return z->value[b];
3928 }
3929 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3930 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3931 {
3932    int b,s;
3933    if (a->num_bits < 16) stbi__fill_bits(a);
3934    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3935    if (b) {
3936       s = b >> 9;
3937       a->code_buffer >>= s;
3938       a->num_bits -= s;
3939       return b & 511;
3940    }
3941    return stbi__zhuffman_decode_slowpath(a, z);
3942 }
3943 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3944 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3945 {
3946    char *q;
3947    int cur, limit, old_limit;
3948    z->zout = zout;
3949    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3950    cur   = (int) (z->zout     - z->zout_start);
3951    limit = old_limit = (int) (z->zout_end - z->zout_start);
3952    while (cur + n > limit)
3953       limit *= 2;
3954    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3955    STBI_NOTUSED(old_limit);
3956    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3957    z->zout_start = q;
3958    z->zout       = q + cur;
3959    z->zout_end   = q + limit;
3960    return 1;
3961 }
3962 
3963 static const int stbi__zlength_base[31] = {
3964    3,4,5,6,7,8,9,10,11,13,
3965    15,17,19,23,27,31,35,43,51,59,
3966    67,83,99,115,131,163,195,227,258,0,0 };
3967 
3968 static const int stbi__zlength_extra[31]=
3969 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3970 
3971 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3972 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3973 
3974 static const int stbi__zdist_extra[32] =
3975 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3976 
stbi__parse_huffman_block(stbi__zbuf * a)3977 static int stbi__parse_huffman_block(stbi__zbuf *a)
3978 {
3979    char *zout = a->zout;
3980    for(;;) {
3981       int z = stbi__zhuffman_decode(a, &a->z_length);
3982       if (z < 256) {
3983          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3984          if (zout >= a->zout_end) {
3985             if (!stbi__zexpand(a, zout, 1)) return 0;
3986             zout = a->zout;
3987          }
3988          *zout++ = (char) z;
3989       } else {
3990          stbi_uc *p;
3991          int len,dist;
3992          if (z == 256) {
3993             a->zout = zout;
3994             return 1;
3995          }
3996          z -= 257;
3997          len = stbi__zlength_base[z];
3998          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3999          z = stbi__zhuffman_decode(a, &a->z_distance);
4000          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4001          dist = stbi__zdist_base[z];
4002          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4003          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4004          if (zout + len > a->zout_end) {
4005             if (!stbi__zexpand(a, zout, len)) return 0;
4006             zout = a->zout;
4007          }
4008          p = (stbi_uc *) (zout - dist);
4009          if (dist == 1) { // run of one byte; common in images.
4010             stbi_uc v = *p;
4011             if (len) { do *zout++ = v; while (--len); }
4012          } else {
4013             if (len) { do *zout++ = *p++; while (--len); }
4014          }
4015       }
4016    }
4017 }
4018 
stbi__compute_huffman_codes(stbi__zbuf * a)4019 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4020 {
4021    static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4022    stbi__zhuffman z_codelength;
4023    stbi_uc lencodes[286+32+137];//padding for maximum single op
4024    stbi_uc codelength_sizes[19];
4025    int i,n;
4026 
4027    int hlit  = stbi__zreceive(a,5) + 257;
4028    int hdist = stbi__zreceive(a,5) + 1;
4029    int hclen = stbi__zreceive(a,4) + 4;
4030    int ntot  = hlit + hdist;
4031 
4032    memset(codelength_sizes, 0, sizeof(codelength_sizes));
4033    for (i=0; i < hclen; ++i) {
4034       int s = stbi__zreceive(a,3);
4035       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4036    }
4037    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4038 
4039    n = 0;
4040    while (n < ntot) {
4041       int c = stbi__zhuffman_decode(a, &z_codelength);
4042       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4043       if (c < 16)
4044          lencodes[n++] = (stbi_uc) c;
4045       else {
4046          stbi_uc fill = 0;
4047          if (c == 16) {
4048             c = stbi__zreceive(a,2)+3;
4049             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4050             fill = lencodes[n-1];
4051          } else if (c == 17)
4052             c = stbi__zreceive(a,3)+3;
4053          else {
4054             STBI_ASSERT(c == 18);
4055             c = stbi__zreceive(a,7)+11;
4056          }
4057          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4058          memset(lencodes+n, fill, c);
4059          n += c;
4060       }
4061    }
4062    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4063    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4064    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4065    return 1;
4066 }
4067 
stbi__parse_uncompressed_block(stbi__zbuf * a)4068 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4069 {
4070    stbi_uc header[4];
4071    int len,nlen,k;
4072    if (a->num_bits & 7)
4073       stbi__zreceive(a, a->num_bits & 7); // discard
4074    // drain the bit-packed data into header
4075    k = 0;
4076    while (a->num_bits > 0) {
4077       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4078       a->code_buffer >>= 8;
4079       a->num_bits -= 8;
4080    }
4081    STBI_ASSERT(a->num_bits == 0);
4082    // now fill header the normal way
4083    while (k < 4)
4084       header[k++] = stbi__zget8(a);
4085    len  = header[1] * 256 + header[0];
4086    nlen = header[3] * 256 + header[2];
4087    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4088    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4089    if (a->zout + len > a->zout_end)
4090       if (!stbi__zexpand(a, a->zout, len)) return 0;
4091    memcpy(a->zout, a->zbuffer, len);
4092    a->zbuffer += len;
4093    a->zout += len;
4094    return 1;
4095 }
4096 
stbi__parse_zlib_header(stbi__zbuf * a)4097 static int stbi__parse_zlib_header(stbi__zbuf *a)
4098 {
4099    int cmf   = stbi__zget8(a);
4100    int cm    = cmf & 15;
4101    /* int cinfo = cmf >> 4; */
4102    int flg   = stbi__zget8(a);
4103    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4104    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4105    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4106    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4107    return 1;
4108 }
4109 
4110 static const stbi_uc stbi__zdefault_length[288] =
4111 {
4112    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4113    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4114    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4115    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4116    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4117    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4118    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4119    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4120    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4121 };
4122 static const stbi_uc stbi__zdefault_distance[32] =
4123 {
4124    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4125 };
4126 /*
4127 Init algorithm:
4128 {
4129    int i;   // use <= to match clearly with spec
4130    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4131    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4132    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4133    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4134 
4135    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4136 }
4137 */
4138 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4139 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4140 {
4141    int final, type;
4142    if (parse_header)
4143       if (!stbi__parse_zlib_header(a)) return 0;
4144    a->num_bits = 0;
4145    a->code_buffer = 0;
4146    do {
4147       final = stbi__zreceive(a,1);
4148       type = stbi__zreceive(a,2);
4149       if (type == 0) {
4150          if (!stbi__parse_uncompressed_block(a)) return 0;
4151       } else if (type == 3) {
4152          return 0;
4153       } else {
4154          if (type == 1) {
4155             // use fixed code lengths
4156             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4157             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4158          } else {
4159             if (!stbi__compute_huffman_codes(a)) return 0;
4160          }
4161          if (!stbi__parse_huffman_block(a)) return 0;
4162       }
4163    } while (!final);
4164    return 1;
4165 }
4166 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4167 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4168 {
4169    a->zout_start = obuf;
4170    a->zout       = obuf;
4171    a->zout_end   = obuf + olen;
4172    a->z_expandable = exp;
4173 
4174    return stbi__parse_zlib(a, parse_header);
4175 }
4176 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4177 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4178 {
4179    stbi__zbuf a;
4180    char *p = (char *) stbi__malloc(initial_size);
4181    if (p == NULL) return NULL;
4182    a.zbuffer = (stbi_uc *) buffer;
4183    a.zbuffer_end = (stbi_uc *) buffer + len;
4184    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4185       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4186       return a.zout_start;
4187    } else {
4188       STBI_FREE(a.zout_start);
4189       return NULL;
4190    }
4191 }
4192 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4193 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4194 {
4195    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4196 }
4197 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4198 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4199 {
4200    stbi__zbuf a;
4201    char *p = (char *) stbi__malloc(initial_size);
4202    if (p == NULL) return NULL;
4203    a.zbuffer = (stbi_uc *) buffer;
4204    a.zbuffer_end = (stbi_uc *) buffer + len;
4205    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4206       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4207       return a.zout_start;
4208    } else {
4209       STBI_FREE(a.zout_start);
4210       return NULL;
4211    }
4212 }
4213 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4214 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4215 {
4216    stbi__zbuf a;
4217    a.zbuffer = (stbi_uc *) ibuffer;
4218    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4219    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4220       return (int) (a.zout - a.zout_start);
4221    else
4222       return -1;
4223 }
4224 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4225 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4226 {
4227    stbi__zbuf a;
4228    char *p = (char *) stbi__malloc(16384);
4229    if (p == NULL) return NULL;
4230    a.zbuffer = (stbi_uc *) buffer;
4231    a.zbuffer_end = (stbi_uc *) buffer+len;
4232    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4233       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4234       return a.zout_start;
4235    } else {
4236       STBI_FREE(a.zout_start);
4237       return NULL;
4238    }
4239 }
4240 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4241 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4242 {
4243    stbi__zbuf a;
4244    a.zbuffer = (stbi_uc *) ibuffer;
4245    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4246    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4247       return (int) (a.zout - a.zout_start);
4248    else
4249       return -1;
4250 }
4251 #endif
4252 
4253 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4254 //    simple implementation
4255 //      - only 8-bit samples
4256 //      - no CRC checking
4257 //      - allocates lots of intermediate memory
4258 //        - avoids problem of streaming data between subsystems
4259 //        - avoids explicit window management
4260 //    performance
4261 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4262 
4263 #ifndef STBI_NO_PNG
4264 typedef struct
4265 {
4266    stbi__uint32 length;
4267    stbi__uint32 type;
4268 } stbi__pngchunk;
4269 
stbi__get_chunk_header(stbi__context * s)4270 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4271 {
4272    stbi__pngchunk c;
4273    c.length = stbi__get32be(s);
4274    c.type   = stbi__get32be(s);
4275    return c;
4276 }
4277 
stbi__check_png_header(stbi__context * s)4278 static int stbi__check_png_header(stbi__context *s)
4279 {
4280    static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4281    int i;
4282    for (i=0; i < 8; ++i)
4283       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4284    return 1;
4285 }
4286 
4287 typedef struct
4288 {
4289    stbi__context *s;
4290    stbi_uc *idata, *expanded, *out;
4291    int depth;
4292 } stbi__png;
4293 
4294 
4295 enum {
4296    STBI__F_none=0,
4297    STBI__F_sub=1,
4298    STBI__F_up=2,
4299    STBI__F_avg=3,
4300    STBI__F_paeth=4,
4301    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4302    STBI__F_avg_first,
4303    STBI__F_paeth_first
4304 };
4305 
4306 static stbi_uc first_row_filter[5] =
4307 {
4308    STBI__F_none,
4309    STBI__F_sub,
4310    STBI__F_none,
4311    STBI__F_avg_first,
4312    STBI__F_paeth_first
4313 };
4314 
stbi__paeth(int a,int b,int c)4315 static int stbi__paeth(int a, int b, int c)
4316 {
4317    int p = a + b - c;
4318    int pa = abs(p-a);
4319    int pb = abs(p-b);
4320    int pc = abs(p-c);
4321    if (pa <= pb && pa <= pc) return a;
4322    if (pb <= pc) return b;
4323    return c;
4324 }
4325 
4326 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4327 
4328 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4329 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4330 {
4331    int bytes = (depth == 16? 2 : 1);
4332    stbi__context *s = a->s;
4333    stbi__uint32 i,j,stride = x*out_n*bytes;
4334    stbi__uint32 img_len, img_width_bytes;
4335    int k;
4336    int img_n = s->img_n; // copy it into a local for later
4337 
4338    int output_bytes = out_n*bytes;
4339    int filter_bytes = img_n*bytes;
4340    int width = x;
4341 
4342    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4343    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4344    if (!a->out) return stbi__err("outofmem", "Out of memory");
4345 
4346    if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4347    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4348    img_len = (img_width_bytes + 1) * y;
4349 
4350    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4351    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4352    // so just check for raw_len < img_len always.
4353    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4354 
4355    for (j=0; j < y; ++j) {
4356       stbi_uc *cur = a->out + stride*j;
4357       stbi_uc *prior;
4358       int filter = *raw++;
4359 
4360       if (filter > 4)
4361          return stbi__err("invalid filter","Corrupt PNG");
4362 
4363       if (depth < 8) {
4364          STBI_ASSERT(img_width_bytes <= x);
4365          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4366          filter_bytes = 1;
4367          width = img_width_bytes;
4368       }
4369       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4370 
4371       // if first row, use special filter that doesn't sample previous row
4372       if (j == 0) filter = first_row_filter[filter];
4373 
4374       // handle first byte explicitly
4375       for (k=0; k < filter_bytes; ++k) {
4376          switch (filter) {
4377             case STBI__F_none       : cur[k] = raw[k]; break;
4378             case STBI__F_sub        : cur[k] = raw[k]; break;
4379             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4380             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4381             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4382             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4383             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4384          }
4385       }
4386 
4387       if (depth == 8) {
4388          if (img_n != out_n)
4389             cur[img_n] = 255; // first pixel
4390          raw += img_n;
4391          cur += out_n;
4392          prior += out_n;
4393       } else if (depth == 16) {
4394          if (img_n != out_n) {
4395             cur[filter_bytes]   = 255; // first pixel top byte
4396             cur[filter_bytes+1] = 255; // first pixel bottom byte
4397          }
4398          raw += filter_bytes;
4399          cur += output_bytes;
4400          prior += output_bytes;
4401       } else {
4402          raw += 1;
4403          cur += 1;
4404          prior += 1;
4405       }
4406 
4407       // this is a little gross, so that we don't switch per-pixel or per-component
4408       if (depth < 8 || img_n == out_n) {
4409          int nk = (width - 1)*filter_bytes;
4410          #define STBI__CASE(f) \
4411              case f:     \
4412                 for (k=0; k < nk; ++k)
4413          switch (filter) {
4414             // "none" filter turns into a memcpy here; make that explicit.
4415             case STBI__F_none:         memcpy(cur, raw, nk); break;
4416             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4417             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4418             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4419             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4420             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4421             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4422          }
4423          #undef STBI__CASE
4424          raw += nk;
4425       } else {
4426          STBI_ASSERT(img_n+1 == out_n);
4427          #define STBI__CASE(f) \
4428              case f:     \
4429                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4430                    for (k=0; k < filter_bytes; ++k)
4431          switch (filter) {
4432             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4433             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4434             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4435             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4436             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4437             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4438             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4439          }
4440          #undef STBI__CASE
4441 
4442          // the loop above sets the high byte of the pixels' alpha, but for
4443          // 16 bit png files we also need the low byte set. we'll do that here.
4444          if (depth == 16) {
4445             cur = a->out + stride*j; // start at the beginning of the row again
4446             for (i=0; i < x; ++i,cur+=output_bytes) {
4447                cur[filter_bytes+1] = 255;
4448             }
4449          }
4450       }
4451    }
4452 
4453    // we make a separate pass to expand bits to pixels; for performance,
4454    // this could run two scanlines behind the above code, so it won't
4455    // intefere with filtering but will still be in the cache.
4456    if (depth < 8) {
4457       for (j=0; j < y; ++j) {
4458          stbi_uc *cur = a->out + stride*j;
4459          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4460          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4461          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4462          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4463 
4464          // note that the final byte might overshoot and write more data than desired.
4465          // we can allocate enough data that this never writes out of memory, but it
4466          // could also overwrite the next scanline. can it overwrite non-empty data
4467          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4468          // so we need to explicitly clamp the final ones
4469 
4470          if (depth == 4) {
4471             for (k=x*img_n; k >= 2; k-=2, ++in) {
4472                *cur++ = scale * ((*in >> 4)       );
4473                *cur++ = scale * ((*in     ) & 0x0f);
4474             }
4475             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4476          } else if (depth == 2) {
4477             for (k=x*img_n; k >= 4; k-=4, ++in) {
4478                *cur++ = scale * ((*in >> 6)       );
4479                *cur++ = scale * ((*in >> 4) & 0x03);
4480                *cur++ = scale * ((*in >> 2) & 0x03);
4481                *cur++ = scale * ((*in     ) & 0x03);
4482             }
4483             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4484             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4485             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4486          } else if (depth == 1) {
4487             for (k=x*img_n; k >= 8; k-=8, ++in) {
4488                *cur++ = scale * ((*in >> 7)       );
4489                *cur++ = scale * ((*in >> 6) & 0x01);
4490                *cur++ = scale * ((*in >> 5) & 0x01);
4491                *cur++ = scale * ((*in >> 4) & 0x01);
4492                *cur++ = scale * ((*in >> 3) & 0x01);
4493                *cur++ = scale * ((*in >> 2) & 0x01);
4494                *cur++ = scale * ((*in >> 1) & 0x01);
4495                *cur++ = scale * ((*in     ) & 0x01);
4496             }
4497             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4498             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4499             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4500             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4501             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4502             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4503             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4504          }
4505          if (img_n != out_n) {
4506             int q;
4507             // insert alpha = 255
4508             cur = a->out + stride*j;
4509             if (img_n == 1) {
4510                for (q=x-1; q >= 0; --q) {
4511                   cur[q*2+1] = 255;
4512                   cur[q*2+0] = cur[q];
4513                }
4514             } else {
4515                STBI_ASSERT(img_n == 3);
4516                for (q=x-1; q >= 0; --q) {
4517                   cur[q*4+3] = 255;
4518                   cur[q*4+2] = cur[q*3+2];
4519                   cur[q*4+1] = cur[q*3+1];
4520                   cur[q*4+0] = cur[q*3+0];
4521                }
4522             }
4523          }
4524       }
4525    } else if (depth == 16) {
4526       // force the image data from big-endian to platform-native.
4527       // this is done in a separate pass due to the decoding relying
4528       // on the data being untouched, but could probably be done
4529       // per-line during decode if care is taken.
4530       stbi_uc *cur = a->out;
4531       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4532 
4533       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4534          *cur16 = (cur[0] << 8) | cur[1];
4535       }
4536    }
4537 
4538    return 1;
4539 }
4540 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4541 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4542 {
4543    int bytes = (depth == 16 ? 2 : 1);
4544    int out_bytes = out_n * bytes;
4545    stbi_uc *final;
4546    int p;
4547    if (!interlaced)
4548       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4549 
4550    // de-interlacing
4551    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4552    for (p=0; p < 7; ++p) {
4553       int xorig[] = { 0,4,0,2,0,1,0 };
4554       int yorig[] = { 0,0,4,0,2,0,1 };
4555       int xspc[]  = { 8,8,4,4,2,2,1 };
4556       int yspc[]  = { 8,8,8,4,4,2,2 };
4557       int i,j,x,y;
4558       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4559       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4560       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4561       if (x && y) {
4562          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4563          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4564             STBI_FREE(final);
4565             return 0;
4566          }
4567          for (j=0; j < y; ++j) {
4568             for (i=0; i < x; ++i) {
4569                int out_y = j*yspc[p]+yorig[p];
4570                int out_x = i*xspc[p]+xorig[p];
4571                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4572                       a->out + (j*x+i)*out_bytes, out_bytes);
4573             }
4574          }
4575          STBI_FREE(a->out);
4576          image_data += img_len;
4577          image_data_len -= img_len;
4578       }
4579    }
4580    a->out = final;
4581 
4582    return 1;
4583 }
4584 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4585 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4586 {
4587    stbi__context *s = z->s;
4588    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4589    stbi_uc *p = z->out;
4590 
4591    // compute color-based transparency, assuming we've
4592    // already got 255 as the alpha value in the output
4593    STBI_ASSERT(out_n == 2 || out_n == 4);
4594 
4595    if (out_n == 2) {
4596       for (i=0; i < pixel_count; ++i) {
4597          p[1] = (p[0] == tc[0] ? 0 : 255);
4598          p += 2;
4599       }
4600    } else {
4601       for (i=0; i < pixel_count; ++i) {
4602          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4603             p[3] = 0;
4604          p += 4;
4605       }
4606    }
4607    return 1;
4608 }
4609 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4610 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4611 {
4612    stbi__context *s = z->s;
4613    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4614    stbi__uint16 *p = (stbi__uint16*) z->out;
4615 
4616    // compute color-based transparency, assuming we've
4617    // already got 65535 as the alpha value in the output
4618    STBI_ASSERT(out_n == 2 || out_n == 4);
4619 
4620    if (out_n == 2) {
4621       for (i = 0; i < pixel_count; ++i) {
4622          p[1] = (p[0] == tc[0] ? 0 : 65535);
4623          p += 2;
4624       }
4625    } else {
4626       for (i = 0; i < pixel_count; ++i) {
4627          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4628             p[3] = 0;
4629          p += 4;
4630       }
4631    }
4632    return 1;
4633 }
4634 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4635 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4636 {
4637    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4638    stbi_uc *p, *temp_out, *orig = a->out;
4639 
4640    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4641    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4642 
4643    // between here and free(out) below, exitting would leak
4644    temp_out = p;
4645 
4646    if (pal_img_n == 3) {
4647       for (i=0; i < pixel_count; ++i) {
4648          int n = orig[i]*4;
4649          p[0] = palette[n  ];
4650          p[1] = palette[n+1];
4651          p[2] = palette[n+2];
4652          p += 3;
4653       }
4654    } else {
4655       for (i=0; i < pixel_count; ++i) {
4656          int n = orig[i]*4;
4657          p[0] = palette[n  ];
4658          p[1] = palette[n+1];
4659          p[2] = palette[n+2];
4660          p[3] = palette[n+3];
4661          p += 4;
4662       }
4663    }
4664    STBI_FREE(a->out);
4665    a->out = temp_out;
4666 
4667    STBI_NOTUSED(len);
4668 
4669    return 1;
4670 }
4671 
4672 static int stbi__unpremultiply_on_load = 0;
4673 static int stbi__de_iphone_flag = 0;
4674 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4675 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4676 {
4677    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4678 }
4679 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4680 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4681 {
4682    stbi__de_iphone_flag = flag_true_if_should_convert;
4683 }
4684 
stbi__de_iphone(stbi__png * z)4685 static void stbi__de_iphone(stbi__png *z)
4686 {
4687    stbi__context *s = z->s;
4688    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4689    stbi_uc *p = z->out;
4690 
4691    if (s->img_out_n == 3) {  // convert bgr to rgb
4692       for (i=0; i < pixel_count; ++i) {
4693          stbi_uc t = p[0];
4694          p[0] = p[2];
4695          p[2] = t;
4696          p += 3;
4697       }
4698    } else {
4699       STBI_ASSERT(s->img_out_n == 4);
4700       if (stbi__unpremultiply_on_load) {
4701          // convert bgr to rgb and unpremultiply
4702          for (i=0; i < pixel_count; ++i) {
4703             stbi_uc a = p[3];
4704             stbi_uc t = p[0];
4705             if (a) {
4706                stbi_uc half = a / 2;
4707                p[0] = (p[2] * 255 + half) / a;
4708                p[1] = (p[1] * 255 + half) / a;
4709                p[2] = ( t   * 255 + half) / a;
4710             } else {
4711                p[0] = p[2];
4712                p[2] = t;
4713             }
4714             p += 4;
4715          }
4716       } else {
4717          // convert bgr to rgb
4718          for (i=0; i < pixel_count; ++i) {
4719             stbi_uc t = p[0];
4720             p[0] = p[2];
4721             p[2] = t;
4722             p += 4;
4723          }
4724       }
4725    }
4726 }
4727 
4728 #define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4729 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4730 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4731 {
4732    stbi_uc palette[1024], pal_img_n=0;
4733    stbi_uc has_trans=0, tc[3];
4734    stbi__uint16 tc16[3];
4735    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4736    int first=1,k,interlace=0, color=0, is_iphone=0;
4737    stbi__context *s = z->s;
4738 
4739    z->expanded = NULL;
4740    z->idata = NULL;
4741    z->out = NULL;
4742 
4743    if (!stbi__check_png_header(s)) return 0;
4744 
4745    if (scan == STBI__SCAN_type) return 1;
4746 
4747    for (;;) {
4748       stbi__pngchunk c = stbi__get_chunk_header(s);
4749       switch (c.type) {
4750          case STBI__PNG_TYPE('C','g','B','I'):
4751             is_iphone = 1;
4752             stbi__skip(s, c.length);
4753             break;
4754          case STBI__PNG_TYPE('I','H','D','R'): {
4755             int comp,filter;
4756             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4757             first = 0;
4758             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4759             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4760             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4761             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4762             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4763             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4764             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4765             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4766             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4767             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4768             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4769             if (!pal_img_n) {
4770                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4771                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4772                if (scan == STBI__SCAN_header) return 1;
4773             } else {
4774                // if paletted, then pal_n is our final components, and
4775                // img_n is # components to decompress/filter.
4776                s->img_n = 1;
4777                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4778                // if SCAN_header, have to scan to see if we have a tRNS
4779             }
4780             break;
4781          }
4782 
4783          case STBI__PNG_TYPE('P','L','T','E'):  {
4784             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4785             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4786             pal_len = c.length / 3;
4787             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4788             for (i=0; i < pal_len; ++i) {
4789                palette[i*4+0] = stbi__get8(s);
4790                palette[i*4+1] = stbi__get8(s);
4791                palette[i*4+2] = stbi__get8(s);
4792                palette[i*4+3] = 255;
4793             }
4794             break;
4795          }
4796 
4797          case STBI__PNG_TYPE('t','R','N','S'): {
4798             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4799             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4800             if (pal_img_n) {
4801                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4802                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4803                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4804                pal_img_n = 4;
4805                for (i=0; i < c.length; ++i)
4806                   palette[i*4+3] = stbi__get8(s);
4807             } else {
4808                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4809                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4810                has_trans = 1;
4811                if (z->depth == 16) {
4812                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4813                } else {
4814                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4815                }
4816             }
4817             break;
4818          }
4819 
4820          case STBI__PNG_TYPE('I','D','A','T'): {
4821             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4822             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4823             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4824             if ((int)(ioff + c.length) < (int)ioff) return 0;
4825             if (ioff + c.length > idata_limit) {
4826                stbi__uint32 idata_limit_old = idata_limit;
4827                stbi_uc *p;
4828                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4829                while (ioff + c.length > idata_limit)
4830                   idata_limit *= 2;
4831                STBI_NOTUSED(idata_limit_old);
4832                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4833                z->idata = p;
4834             }
4835             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4836             ioff += c.length;
4837             break;
4838          }
4839 
4840          case STBI__PNG_TYPE('I','E','N','D'): {
4841             stbi__uint32 raw_len, bpl;
4842             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4843             if (scan != STBI__SCAN_load) return 1;
4844             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4845             // initial guess for decoded data size to avoid unnecessary reallocs
4846             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4847             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4848             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4849             if (z->expanded == NULL) return 0; // zlib should set error
4850             STBI_FREE(z->idata); z->idata = NULL;
4851             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4852                s->img_out_n = s->img_n+1;
4853             else
4854                s->img_out_n = s->img_n;
4855             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4856             if (has_trans) {
4857                if (z->depth == 16) {
4858                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4859                } else {
4860                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4861                }
4862             }
4863             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4864                stbi__de_iphone(z);
4865             if (pal_img_n) {
4866                // pal_img_n == 3 or 4
4867                s->img_n = pal_img_n; // record the actual colors we had
4868                s->img_out_n = pal_img_n;
4869                if (req_comp >= 3) s->img_out_n = req_comp;
4870                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4871                   return 0;
4872             } else if (has_trans) {
4873                // non-paletted image with tRNS -> source image has (constant) alpha
4874                ++s->img_n;
4875             }
4876             STBI_FREE(z->expanded); z->expanded = NULL;
4877             return 1;
4878          }
4879 
4880          default:
4881             // if critical, fail
4882             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4883             if ((c.type & (1 << 29)) == 0) {
4884                #ifndef STBI_NO_FAILURE_STRINGS
4885                // not threadsafe
4886                static char invalid_chunk[] = "XXXX PNG chunk not known";
4887                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4888                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4889                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4890                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4891                #endif
4892                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4893             }
4894             stbi__skip(s, c.length);
4895             break;
4896       }
4897       // end of PNG chunk, read and skip CRC
4898       stbi__get32be(s);
4899    }
4900 }
4901 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4902 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4903 {
4904    void *result=NULL;
4905    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4906    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4907       if (p->depth < 8)
4908          ri->bits_per_channel = 8;
4909       else
4910          ri->bits_per_channel = p->depth;
4911       result = p->out;
4912       p->out = NULL;
4913       if (req_comp && req_comp != p->s->img_out_n) {
4914          if (ri->bits_per_channel == 8)
4915             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4916          else
4917             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4918          p->s->img_out_n = req_comp;
4919          if (result == NULL) return result;
4920       }
4921       *x = p->s->img_x;
4922       *y = p->s->img_y;
4923       if (n) *n = p->s->img_n;
4924    }
4925    STBI_FREE(p->out);      p->out      = NULL;
4926    STBI_FREE(p->expanded); p->expanded = NULL;
4927    STBI_FREE(p->idata);    p->idata    = NULL;
4928 
4929    return result;
4930 }
4931 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4932 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4933 {
4934    stbi__png p;
4935    p.s = s;
4936    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4937 }
4938 
stbi__png_test(stbi__context * s)4939 static int stbi__png_test(stbi__context *s)
4940 {
4941    int r;
4942    r = stbi__check_png_header(s);
4943    stbi__rewind(s);
4944    return r;
4945 }
4946 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4947 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4948 {
4949    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4950       stbi__rewind( p->s );
4951       return 0;
4952    }
4953    if (x) *x = p->s->img_x;
4954    if (y) *y = p->s->img_y;
4955    if (comp) *comp = p->s->img_n;
4956    return 1;
4957 }
4958 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4959 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4960 {
4961    stbi__png p;
4962    p.s = s;
4963    return stbi__png_info_raw(&p, x, y, comp);
4964 }
4965 
stbi__png_is16(stbi__context * s)4966 static int stbi__png_is16(stbi__context *s)
4967 {
4968    stbi__png p;
4969    p.s = s;
4970    if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
4971 	   return 0;
4972    if (p.depth != 16) {
4973       stbi__rewind(p.s);
4974       return 0;
4975    }
4976    return 1;
4977 }
4978 #endif
4979 
4980 // Microsoft/Windows BMP image
4981 
4982 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4983 static int stbi__bmp_test_raw(stbi__context *s)
4984 {
4985    int r;
4986    int sz;
4987    if (stbi__get8(s) != 'B') return 0;
4988    if (stbi__get8(s) != 'M') return 0;
4989    stbi__get32le(s); // discard filesize
4990    stbi__get16le(s); // discard reserved
4991    stbi__get16le(s); // discard reserved
4992    stbi__get32le(s); // discard data offset
4993    sz = stbi__get32le(s);
4994    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4995    return r;
4996 }
4997 
stbi__bmp_test(stbi__context * s)4998 static int stbi__bmp_test(stbi__context *s)
4999 {
5000    int r = stbi__bmp_test_raw(s);
5001    stbi__rewind(s);
5002    return r;
5003 }
5004 
5005 
5006 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)5007 static int stbi__high_bit(unsigned int z)
5008 {
5009    int n=0;
5010    if (z == 0) return -1;
5011    if (z >= 0x10000) n += 16, z >>= 16;
5012    if (z >= 0x00100) n +=  8, z >>=  8;
5013    if (z >= 0x00010) n +=  4, z >>=  4;
5014    if (z >= 0x00004) n +=  2, z >>=  2;
5015    if (z >= 0x00002) n +=  1, z >>=  1;
5016    return n;
5017 }
5018 
stbi__bitcount(unsigned int a)5019 static int stbi__bitcount(unsigned int a)
5020 {
5021    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
5022    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
5023    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5024    a = (a + (a >> 8)); // max 16 per 8 bits
5025    a = (a + (a >> 16)); // max 32 per 8 bits
5026    return a & 0xff;
5027 }
5028 
5029 // extract an arbitrarily-aligned N-bit value (N=bits)
5030 // from v, and then make it 8-bits long and fractionally
5031 // extend it to full full range.
stbi__shiftsigned(int v,int shift,int bits)5032 static int stbi__shiftsigned(int v, int shift, int bits)
5033 {
5034    static unsigned int mul_table[9] = {
5035       0,
5036       0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5037       0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5038    };
5039    static unsigned int shift_table[9] = {
5040       0, 0,0,1,0,2,4,6,0,
5041    };
5042    if (shift < 0)
5043       v <<= -shift;
5044    else
5045       v >>= shift;
5046    STBI_ASSERT(v >= 0 && v < 256);
5047    v >>= (8-bits);
5048    STBI_ASSERT(bits >= 0 && bits <= 8);
5049    return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5050 }
5051 
5052 typedef struct
5053 {
5054    int bpp, offset, hsz;
5055    unsigned int mr,mg,mb,ma, all_a;
5056 } stbi__bmp_data;
5057 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)5058 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5059 {
5060    int hsz;
5061    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5062    stbi__get32le(s); // discard filesize
5063    stbi__get16le(s); // discard reserved
5064    stbi__get16le(s); // discard reserved
5065    info->offset = stbi__get32le(s);
5066    info->hsz = hsz = stbi__get32le(s);
5067    info->mr = info->mg = info->mb = info->ma = 0;
5068 
5069    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5070    if (hsz == 12) {
5071       s->img_x = stbi__get16le(s);
5072       s->img_y = stbi__get16le(s);
5073    } else {
5074       s->img_x = stbi__get32le(s);
5075       s->img_y = stbi__get32le(s);
5076    }
5077    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5078    info->bpp = stbi__get16le(s);
5079    if (hsz != 12) {
5080       int compress = stbi__get32le(s);
5081       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5082       stbi__get32le(s); // discard sizeof
5083       stbi__get32le(s); // discard hres
5084       stbi__get32le(s); // discard vres
5085       stbi__get32le(s); // discard colorsused
5086       stbi__get32le(s); // discard max important
5087       if (hsz == 40 || hsz == 56) {
5088          if (hsz == 56) {
5089             stbi__get32le(s);
5090             stbi__get32le(s);
5091             stbi__get32le(s);
5092             stbi__get32le(s);
5093          }
5094          if (info->bpp == 16 || info->bpp == 32) {
5095             if (compress == 0) {
5096                if (info->bpp == 32) {
5097                   info->mr = 0xffu << 16;
5098                   info->mg = 0xffu <<  8;
5099                   info->mb = 0xffu <<  0;
5100                   info->ma = 0xffu << 24;
5101                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5102                } else {
5103                   info->mr = 31u << 10;
5104                   info->mg = 31u <<  5;
5105                   info->mb = 31u <<  0;
5106                }
5107             } else if (compress == 3) {
5108                info->mr = stbi__get32le(s);
5109                info->mg = stbi__get32le(s);
5110                info->mb = stbi__get32le(s);
5111                // not documented, but generated by photoshop and handled by mspaint
5112                if (info->mr == info->mg && info->mg == info->mb) {
5113                   // ?!?!?
5114                   return stbi__errpuc("bad BMP", "bad BMP");
5115                }
5116             } else
5117                return stbi__errpuc("bad BMP", "bad BMP");
5118          }
5119       } else {
5120          int i;
5121          if (hsz != 108 && hsz != 124)
5122             return stbi__errpuc("bad BMP", "bad BMP");
5123          info->mr = stbi__get32le(s);
5124          info->mg = stbi__get32le(s);
5125          info->mb = stbi__get32le(s);
5126          info->ma = stbi__get32le(s);
5127          stbi__get32le(s); // discard color space
5128          for (i=0; i < 12; ++i)
5129             stbi__get32le(s); // discard color space parameters
5130          if (hsz == 124) {
5131             stbi__get32le(s); // discard rendering intent
5132             stbi__get32le(s); // discard offset of profile data
5133             stbi__get32le(s); // discard size of profile data
5134             stbi__get32le(s); // discard reserved
5135          }
5136       }
5137    }
5138    return (void *) 1;
5139 }
5140 
5141 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5142 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5143 {
5144    stbi_uc *out;
5145    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5146    stbi_uc pal[256][4];
5147    int psize=0,i,j,width;
5148    int flip_vertically, pad, target;
5149    stbi__bmp_data info;
5150    STBI_NOTUSED(ri);
5151 
5152    info.all_a = 255;
5153    if (stbi__bmp_parse_header(s, &info) == NULL)
5154       return NULL; // error code already set
5155 
5156    flip_vertically = ((int) s->img_y) > 0;
5157    s->img_y = abs((int) s->img_y);
5158 
5159    mr = info.mr;
5160    mg = info.mg;
5161    mb = info.mb;
5162    ma = info.ma;
5163    all_a = info.all_a;
5164 
5165    if (info.hsz == 12) {
5166       if (info.bpp < 24)
5167          psize = (info.offset - 14 - 24) / 3;
5168    } else {
5169       if (info.bpp < 16)
5170          psize = (info.offset - 14 - info.hsz) >> 2;
5171    }
5172 
5173    s->img_n = ma ? 4 : 3;
5174    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5175       target = req_comp;
5176    else
5177       target = s->img_n; // if they want monochrome, we'll post-convert
5178 
5179    // sanity-check size
5180    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5181       return stbi__errpuc("too large", "Corrupt BMP");
5182 
5183    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5184    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5185    if (info.bpp < 16) {
5186       int z=0;
5187       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5188       for (i=0; i < psize; ++i) {
5189          pal[i][2] = stbi__get8(s);
5190          pal[i][1] = stbi__get8(s);
5191          pal[i][0] = stbi__get8(s);
5192          if (info.hsz != 12) stbi__get8(s);
5193          pal[i][3] = 255;
5194       }
5195       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5196       if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5197       else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5198       else if (info.bpp == 8) width = s->img_x;
5199       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5200       pad = (-width)&3;
5201       if (info.bpp == 1) {
5202          for (j=0; j < (int) s->img_y; ++j) {
5203             int bit_offset = 7, v = stbi__get8(s);
5204             for (i=0; i < (int) s->img_x; ++i) {
5205                int color = (v>>bit_offset)&0x1;
5206                out[z++] = pal[color][0];
5207                out[z++] = pal[color][1];
5208                out[z++] = pal[color][2];
5209                if((--bit_offset) < 0) {
5210                   bit_offset = 7;
5211                   v = stbi__get8(s);
5212                }
5213             }
5214             stbi__skip(s, pad);
5215          }
5216       } else {
5217          for (j=0; j < (int) s->img_y; ++j) {
5218             for (i=0; i < (int) s->img_x; i += 2) {
5219                int v=stbi__get8(s),v2=0;
5220                if (info.bpp == 4) {
5221                   v2 = v & 15;
5222                   v >>= 4;
5223                }
5224                out[z++] = pal[v][0];
5225                out[z++] = pal[v][1];
5226                out[z++] = pal[v][2];
5227                if (target == 4) out[z++] = 255;
5228                if (i+1 == (int) s->img_x) break;
5229                v = (info.bpp == 8) ? stbi__get8(s) : v2;
5230                out[z++] = pal[v][0];
5231                out[z++] = pal[v][1];
5232                out[z++] = pal[v][2];
5233                if (target == 4) out[z++] = 255;
5234             }
5235             stbi__skip(s, pad);
5236          }
5237       }
5238    } else {
5239       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5240       int z = 0;
5241       int easy=0;
5242       stbi__skip(s, info.offset - 14 - info.hsz);
5243       if (info.bpp == 24) width = 3 * s->img_x;
5244       else if (info.bpp == 16) width = 2*s->img_x;
5245       else /* bpp = 32 and pad = 0 */ width=0;
5246       pad = (-width) & 3;
5247       if (info.bpp == 24) {
5248          easy = 1;
5249       } else if (info.bpp == 32) {
5250          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5251             easy = 2;
5252       }
5253       if (!easy) {
5254          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5255          // right shift amt to put high bit in position #7
5256          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5257          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5258          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5259          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5260       }
5261       for (j=0; j < (int) s->img_y; ++j) {
5262          if (easy) {
5263             for (i=0; i < (int) s->img_x; ++i) {
5264                unsigned char a;
5265                out[z+2] = stbi__get8(s);
5266                out[z+1] = stbi__get8(s);
5267                out[z+0] = stbi__get8(s);
5268                z += 3;
5269                a = (easy == 2 ? stbi__get8(s) : 255);
5270                all_a |= a;
5271                if (target == 4) out[z++] = a;
5272             }
5273          } else {
5274             int bpp = info.bpp;
5275             for (i=0; i < (int) s->img_x; ++i) {
5276                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5277                unsigned int a;
5278                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5279                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5280                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5281                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5282                all_a |= a;
5283                if (target == 4) out[z++] = STBI__BYTECAST(a);
5284             }
5285          }
5286          stbi__skip(s, pad);
5287       }
5288    }
5289 
5290    // if alpha channel is all 0s, replace with all 255s
5291    if (target == 4 && all_a == 0)
5292       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5293          out[i] = 255;
5294 
5295    if (flip_vertically) {
5296       stbi_uc t;
5297       for (j=0; j < (int) s->img_y>>1; ++j) {
5298          stbi_uc *p1 = out +      j     *s->img_x*target;
5299          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5300          for (i=0; i < (int) s->img_x*target; ++i) {
5301             t = p1[i], p1[i] = p2[i], p2[i] = t;
5302          }
5303       }
5304    }
5305 
5306    if (req_comp && req_comp != target) {
5307       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5308       if (out == NULL) return out; // stbi__convert_format frees input on failure
5309    }
5310 
5311    *x = s->img_x;
5312    *y = s->img_y;
5313    if (comp) *comp = s->img_n;
5314    return out;
5315 }
5316 #endif
5317 
5318 // Targa Truevision - TGA
5319 // by Jonathan Dummer
5320 #ifndef STBI_NO_TGA
5321 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5322 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5323 {
5324    // only RGB or RGBA (incl. 16bit) or grey allowed
5325    if (is_rgb16) *is_rgb16 = 0;
5326    switch(bits_per_pixel) {
5327       case 8:  return STBI_grey;
5328       case 16: if(is_grey) return STBI_grey_alpha;
5329                // fallthrough
5330       case 15: if(is_rgb16) *is_rgb16 = 1;
5331                return STBI_rgb;
5332       case 24: // fallthrough
5333       case 32: return bits_per_pixel/8;
5334       default: return 0;
5335    }
5336 }
5337 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5338 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5339 {
5340     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5341     int sz, tga_colormap_type;
5342     stbi__get8(s);                   // discard Offset
5343     tga_colormap_type = stbi__get8(s); // colormap type
5344     if( tga_colormap_type > 1 ) {
5345         stbi__rewind(s);
5346         return 0;      // only RGB or indexed allowed
5347     }
5348     tga_image_type = stbi__get8(s); // image type
5349     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5350         if (tga_image_type != 1 && tga_image_type != 9) {
5351             stbi__rewind(s);
5352             return 0;
5353         }
5354         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5355         sz = stbi__get8(s);    //   check bits per palette color entry
5356         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5357             stbi__rewind(s);
5358             return 0;
5359         }
5360         stbi__skip(s,4);       // skip image x and y origin
5361         tga_colormap_bpp = sz;
5362     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5363         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5364             stbi__rewind(s);
5365             return 0; // only RGB or grey allowed, +/- RLE
5366         }
5367         stbi__skip(s,9); // skip colormap specification and image x/y origin
5368         tga_colormap_bpp = 0;
5369     }
5370     tga_w = stbi__get16le(s);
5371     if( tga_w < 1 ) {
5372         stbi__rewind(s);
5373         return 0;   // test width
5374     }
5375     tga_h = stbi__get16le(s);
5376     if( tga_h < 1 ) {
5377         stbi__rewind(s);
5378         return 0;   // test height
5379     }
5380     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5381     stbi__get8(s); // ignore alpha bits
5382     if (tga_colormap_bpp != 0) {
5383         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5384             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5385             // I don't think anything but 8 or 16bit indexes makes sense
5386             stbi__rewind(s);
5387             return 0;
5388         }
5389         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5390     } else {
5391         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5392     }
5393     if(!tga_comp) {
5394       stbi__rewind(s);
5395       return 0;
5396     }
5397     if (x) *x = tga_w;
5398     if (y) *y = tga_h;
5399     if (comp) *comp = tga_comp;
5400     return 1;                   // seems to have passed everything
5401 }
5402 
stbi__tga_test(stbi__context * s)5403 static int stbi__tga_test(stbi__context *s)
5404 {
5405    int res = 0;
5406    int sz, tga_color_type;
5407    stbi__get8(s);      //   discard Offset
5408    tga_color_type = stbi__get8(s);   //   color type
5409    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5410    sz = stbi__get8(s);   //   image type
5411    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5412       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5413       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5414       sz = stbi__get8(s);    //   check bits per palette color entry
5415       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5416       stbi__skip(s,4);       // skip image x and y origin
5417    } else { // "normal" image w/o colormap
5418       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5419       stbi__skip(s,9); // skip colormap specification and image x/y origin
5420    }
5421    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5422    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5423    sz = stbi__get8(s);   //   bits per pixel
5424    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5425    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5426 
5427    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5428 
5429 errorEnd:
5430    stbi__rewind(s);
5431    return res;
5432 }
5433 
5434 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5435 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5436 {
5437    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5438    stbi__uint16 fiveBitMask = 31;
5439    // we have 3 channels with 5bits each
5440    int r = (px >> 10) & fiveBitMask;
5441    int g = (px >> 5) & fiveBitMask;
5442    int b = px & fiveBitMask;
5443    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5444    out[0] = (stbi_uc)((r * 255)/31);
5445    out[1] = (stbi_uc)((g * 255)/31);
5446    out[2] = (stbi_uc)((b * 255)/31);
5447 
5448    // some people claim that the most significant bit might be used for alpha
5449    // (possibly if an alpha-bit is set in the "image descriptor byte")
5450    // but that only made 16bit test images completely translucent..
5451    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5452 }
5453 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5454 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5455 {
5456    //   read in the TGA header stuff
5457    int tga_offset = stbi__get8(s);
5458    int tga_indexed = stbi__get8(s);
5459    int tga_image_type = stbi__get8(s);
5460    int tga_is_RLE = 0;
5461    int tga_palette_start = stbi__get16le(s);
5462    int tga_palette_len = stbi__get16le(s);
5463    int tga_palette_bits = stbi__get8(s);
5464    int tga_x_origin = stbi__get16le(s);
5465    int tga_y_origin = stbi__get16le(s);
5466    int tga_width = stbi__get16le(s);
5467    int tga_height = stbi__get16le(s);
5468    int tga_bits_per_pixel = stbi__get8(s);
5469    int tga_comp, tga_rgb16=0;
5470    int tga_inverted = stbi__get8(s);
5471    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5472    //   image data
5473    unsigned char *tga_data;
5474    unsigned char *tga_palette = NULL;
5475    int i, j;
5476    unsigned char raw_data[4] = {0};
5477    int RLE_count = 0;
5478    int RLE_repeating = 0;
5479    int read_next_pixel = 1;
5480    STBI_NOTUSED(ri);
5481 
5482    //   do a tiny bit of precessing
5483    if ( tga_image_type >= 8 )
5484    {
5485       tga_image_type -= 8;
5486       tga_is_RLE = 1;
5487    }
5488    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5489 
5490    //   If I'm paletted, then I'll use the number of bits from the palette
5491    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5492    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5493 
5494    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5495       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5496 
5497    //   tga info
5498    *x = tga_width;
5499    *y = tga_height;
5500    if (comp) *comp = tga_comp;
5501 
5502    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5503       return stbi__errpuc("too large", "Corrupt TGA");
5504 
5505    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5506    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5507 
5508    // skip to the data's starting position (offset usually = 0)
5509    stbi__skip(s, tga_offset );
5510 
5511    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5512       for (i=0; i < tga_height; ++i) {
5513          int row = tga_inverted ? tga_height -i - 1 : i;
5514          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5515          stbi__getn(s, tga_row, tga_width * tga_comp);
5516       }
5517    } else  {
5518       //   do I need to load a palette?
5519       if ( tga_indexed)
5520       {
5521          //   any data to skip? (offset usually = 0)
5522          stbi__skip(s, tga_palette_start );
5523          //   load the palette
5524          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5525          if (!tga_palette) {
5526             STBI_FREE(tga_data);
5527             return stbi__errpuc("outofmem", "Out of memory");
5528          }
5529          if (tga_rgb16) {
5530             stbi_uc *pal_entry = tga_palette;
5531             STBI_ASSERT(tga_comp == STBI_rgb);
5532             for (i=0; i < tga_palette_len; ++i) {
5533                stbi__tga_read_rgb16(s, pal_entry);
5534                pal_entry += tga_comp;
5535             }
5536          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5537                STBI_FREE(tga_data);
5538                STBI_FREE(tga_palette);
5539                return stbi__errpuc("bad palette", "Corrupt TGA");
5540          }
5541       }
5542       //   load the data
5543       for (i=0; i < tga_width * tga_height; ++i)
5544       {
5545          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5546          if ( tga_is_RLE )
5547          {
5548             if ( RLE_count == 0 )
5549             {
5550                //   yep, get the next byte as a RLE command
5551                int RLE_cmd = stbi__get8(s);
5552                RLE_count = 1 + (RLE_cmd & 127);
5553                RLE_repeating = RLE_cmd >> 7;
5554                read_next_pixel = 1;
5555             } else if ( !RLE_repeating )
5556             {
5557                read_next_pixel = 1;
5558             }
5559          } else
5560          {
5561             read_next_pixel = 1;
5562          }
5563          //   OK, if I need to read a pixel, do it now
5564          if ( read_next_pixel )
5565          {
5566             //   load however much data we did have
5567             if ( tga_indexed )
5568             {
5569                // read in index, then perform the lookup
5570                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5571                if ( pal_idx >= tga_palette_len ) {
5572                   // invalid index
5573                   pal_idx = 0;
5574                }
5575                pal_idx *= tga_comp;
5576                for (j = 0; j < tga_comp; ++j) {
5577                   raw_data[j] = tga_palette[pal_idx+j];
5578                }
5579             } else if(tga_rgb16) {
5580                STBI_ASSERT(tga_comp == STBI_rgb);
5581                stbi__tga_read_rgb16(s, raw_data);
5582             } else {
5583                //   read in the data raw
5584                for (j = 0; j < tga_comp; ++j) {
5585                   raw_data[j] = stbi__get8(s);
5586                }
5587             }
5588             //   clear the reading flag for the next pixel
5589             read_next_pixel = 0;
5590          } // end of reading a pixel
5591 
5592          // copy data
5593          for (j = 0; j < tga_comp; ++j)
5594            tga_data[i*tga_comp+j] = raw_data[j];
5595 
5596          //   in case we're in RLE mode, keep counting down
5597          --RLE_count;
5598       }
5599       //   do I need to invert the image?
5600       if ( tga_inverted )
5601       {
5602          for (j = 0; j*2 < tga_height; ++j)
5603          {
5604             int index1 = j * tga_width * tga_comp;
5605             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5606             for (i = tga_width * tga_comp; i > 0; --i)
5607             {
5608                unsigned char temp = tga_data[index1];
5609                tga_data[index1] = tga_data[index2];
5610                tga_data[index2] = temp;
5611                ++index1;
5612                ++index2;
5613             }
5614          }
5615       }
5616       //   clear my palette, if I had one
5617       if ( tga_palette != NULL )
5618       {
5619          STBI_FREE( tga_palette );
5620       }
5621    }
5622 
5623    // swap RGB - if the source data was RGB16, it already is in the right order
5624    if (tga_comp >= 3 && !tga_rgb16)
5625    {
5626       unsigned char* tga_pixel = tga_data;
5627       for (i=0; i < tga_width * tga_height; ++i)
5628       {
5629          unsigned char temp = tga_pixel[0];
5630          tga_pixel[0] = tga_pixel[2];
5631          tga_pixel[2] = temp;
5632          tga_pixel += tga_comp;
5633       }
5634    }
5635 
5636    // convert to target component count
5637    if (req_comp && req_comp != tga_comp)
5638       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5639 
5640    //   the things I do to get rid of an error message, and yet keep
5641    //   Microsoft's C compilers happy... [8^(
5642    tga_palette_start = tga_palette_len = tga_palette_bits =
5643          tga_x_origin = tga_y_origin = 0;
5644    //   OK, done
5645    return tga_data;
5646 }
5647 #endif
5648 
5649 // *************************************************************************************************
5650 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5651 
5652 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5653 static int stbi__psd_test(stbi__context *s)
5654 {
5655    int r = (stbi__get32be(s) == 0x38425053);
5656    stbi__rewind(s);
5657    return r;
5658 }
5659 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5660 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5661 {
5662    int count, nleft, len;
5663 
5664    count = 0;
5665    while ((nleft = pixelCount - count) > 0) {
5666       len = stbi__get8(s);
5667       if (len == 128) {
5668          // No-op.
5669       } else if (len < 128) {
5670          // Copy next len+1 bytes literally.
5671          len++;
5672          if (len > nleft) return 0; // corrupt data
5673          count += len;
5674          while (len) {
5675             *p = stbi__get8(s);
5676             p += 4;
5677             len--;
5678          }
5679       } else if (len > 128) {
5680          stbi_uc   val;
5681          // Next -len+1 bytes in the dest are replicated from next source byte.
5682          // (Interpret len as a negative 8-bit int.)
5683          len = 257 - len;
5684          if (len > nleft) return 0; // corrupt data
5685          val = stbi__get8(s);
5686          count += len;
5687          while (len) {
5688             *p = val;
5689             p += 4;
5690             len--;
5691          }
5692       }
5693    }
5694 
5695    return 1;
5696 }
5697 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5698 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5699 {
5700    int pixelCount;
5701    int channelCount, compression;
5702    int channel, i;
5703    int bitdepth;
5704    int w,h;
5705    stbi_uc *out;
5706    STBI_NOTUSED(ri);
5707 
5708    // Check identifier
5709    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5710       return stbi__errpuc("not PSD", "Corrupt PSD image");
5711 
5712    // Check file type version.
5713    if (stbi__get16be(s) != 1)
5714       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5715 
5716    // Skip 6 reserved bytes.
5717    stbi__skip(s, 6 );
5718 
5719    // Read the number of channels (R, G, B, A, etc).
5720    channelCount = stbi__get16be(s);
5721    if (channelCount < 0 || channelCount > 16)
5722       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5723 
5724    // Read the rows and columns of the image.
5725    h = stbi__get32be(s);
5726    w = stbi__get32be(s);
5727 
5728    // Make sure the depth is 8 bits.
5729    bitdepth = stbi__get16be(s);
5730    if (bitdepth != 8 && bitdepth != 16)
5731       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5732 
5733    // Make sure the color mode is RGB.
5734    // Valid options are:
5735    //   0: Bitmap
5736    //   1: Grayscale
5737    //   2: Indexed color
5738    //   3: RGB color
5739    //   4: CMYK color
5740    //   7: Multichannel
5741    //   8: Duotone
5742    //   9: Lab color
5743    if (stbi__get16be(s) != 3)
5744       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5745 
5746    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5747    stbi__skip(s,stbi__get32be(s) );
5748 
5749    // Skip the image resources.  (resolution, pen tool paths, etc)
5750    stbi__skip(s, stbi__get32be(s) );
5751 
5752    // Skip the reserved data.
5753    stbi__skip(s, stbi__get32be(s) );
5754 
5755    // Find out if the data is compressed.
5756    // Known values:
5757    //   0: no compression
5758    //   1: RLE compressed
5759    compression = stbi__get16be(s);
5760    if (compression > 1)
5761       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5762 
5763    // Check size
5764    if (!stbi__mad3sizes_valid(4, w, h, 0))
5765       return stbi__errpuc("too large", "Corrupt PSD");
5766 
5767    // Create the destination image.
5768 
5769    if (!compression && bitdepth == 16 && bpc == 16) {
5770       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5771       ri->bits_per_channel = 16;
5772    } else
5773       out = (stbi_uc *) stbi__malloc(4 * w*h);
5774 
5775    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5776    pixelCount = w*h;
5777 
5778    // Initialize the data to zero.
5779    //memset( out, 0, pixelCount * 4 );
5780 
5781    // Finally, the image data.
5782    if (compression) {
5783       // RLE as used by .PSD and .TIFF
5784       // Loop until you get the number of unpacked bytes you are expecting:
5785       //     Read the next source byte into n.
5786       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5787       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5788       //     Else if n is 128, noop.
5789       // Endloop
5790 
5791       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5792       // which we're going to just skip.
5793       stbi__skip(s, h * channelCount * 2 );
5794 
5795       // Read the RLE data by channel.
5796       for (channel = 0; channel < 4; channel++) {
5797          stbi_uc *p;
5798 
5799          p = out+channel;
5800          if (channel >= channelCount) {
5801             // Fill this channel with default data.
5802             for (i = 0; i < pixelCount; i++, p += 4)
5803                *p = (channel == 3 ? 255 : 0);
5804          } else {
5805             // Read the RLE data.
5806             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5807                STBI_FREE(out);
5808                return stbi__errpuc("corrupt", "bad RLE data");
5809             }
5810          }
5811       }
5812 
5813    } else {
5814       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5815       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5816 
5817       // Read the data by channel.
5818       for (channel = 0; channel < 4; channel++) {
5819          if (channel >= channelCount) {
5820             // Fill this channel with default data.
5821             if (bitdepth == 16 && bpc == 16) {
5822                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5823                stbi__uint16 val = channel == 3 ? 65535 : 0;
5824                for (i = 0; i < pixelCount; i++, q += 4)
5825                   *q = val;
5826             } else {
5827                stbi_uc *p = out+channel;
5828                stbi_uc val = channel == 3 ? 255 : 0;
5829                for (i = 0; i < pixelCount; i++, p += 4)
5830                   *p = val;
5831             }
5832          } else {
5833             if (ri->bits_per_channel == 16) {    // output bpc
5834                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5835                for (i = 0; i < pixelCount; i++, q += 4)
5836                   *q = (stbi__uint16) stbi__get16be(s);
5837             } else {
5838                stbi_uc *p = out+channel;
5839                if (bitdepth == 16) {  // input bpc
5840                   for (i = 0; i < pixelCount; i++, p += 4)
5841                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5842                } else {
5843                   for (i = 0; i < pixelCount; i++, p += 4)
5844                      *p = stbi__get8(s);
5845                }
5846             }
5847          }
5848       }
5849    }
5850 
5851    // remove weird white matte from PSD
5852    if (channelCount >= 4) {
5853       if (ri->bits_per_channel == 16) {
5854          for (i=0; i < w*h; ++i) {
5855             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5856             if (pixel[3] != 0 && pixel[3] != 65535) {
5857                float a = pixel[3] / 65535.0f;
5858                float ra = 1.0f / a;
5859                float inv_a = 65535.0f * (1 - ra);
5860                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5861                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5862                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5863             }
5864          }
5865       } else {
5866          for (i=0; i < w*h; ++i) {
5867             unsigned char *pixel = out + 4*i;
5868             if (pixel[3] != 0 && pixel[3] != 255) {
5869                float a = pixel[3] / 255.0f;
5870                float ra = 1.0f / a;
5871                float inv_a = 255.0f * (1 - ra);
5872                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5873                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5874                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5875             }
5876          }
5877       }
5878    }
5879 
5880    // convert to desired output format
5881    if (req_comp && req_comp != 4) {
5882       if (ri->bits_per_channel == 16)
5883          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5884       else
5885          out = stbi__convert_format(out, 4, req_comp, w, h);
5886       if (out == NULL) return out; // stbi__convert_format frees input on failure
5887    }
5888 
5889    if (comp) *comp = 4;
5890    *y = h;
5891    *x = w;
5892 
5893    return out;
5894 }
5895 #endif
5896 
5897 // *************************************************************************************************
5898 // Softimage PIC loader
5899 // by Tom Seddon
5900 //
5901 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5902 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5903 
5904 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5905 static int stbi__pic_is4(stbi__context *s,const char *str)
5906 {
5907    int i;
5908    for (i=0; i<4; ++i)
5909       if (stbi__get8(s) != (stbi_uc)str[i])
5910          return 0;
5911 
5912    return 1;
5913 }
5914 
stbi__pic_test_core(stbi__context * s)5915 static int stbi__pic_test_core(stbi__context *s)
5916 {
5917    int i;
5918 
5919    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5920       return 0;
5921 
5922    for(i=0;i<84;++i)
5923       stbi__get8(s);
5924 
5925    if (!stbi__pic_is4(s,"PICT"))
5926       return 0;
5927 
5928    return 1;
5929 }
5930 
5931 typedef struct
5932 {
5933    stbi_uc size,type,channel;
5934 } stbi__pic_packet;
5935 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5936 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5937 {
5938    int mask=0x80, i;
5939 
5940    for (i=0; i<4; ++i, mask>>=1) {
5941       if (channel & mask) {
5942          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5943          dest[i]=stbi__get8(s);
5944       }
5945    }
5946 
5947    return dest;
5948 }
5949 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5950 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5951 {
5952    int mask=0x80,i;
5953 
5954    for (i=0;i<4; ++i, mask>>=1)
5955       if (channel&mask)
5956          dest[i]=src[i];
5957 }
5958 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5959 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5960 {
5961    int act_comp=0,num_packets=0,y,chained;
5962    stbi__pic_packet packets[10];
5963 
5964    // this will (should...) cater for even some bizarre stuff like having data
5965     // for the same channel in multiple packets.
5966    do {
5967       stbi__pic_packet *packet;
5968 
5969       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5970          return stbi__errpuc("bad format","too many packets");
5971 
5972       packet = &packets[num_packets++];
5973 
5974       chained = stbi__get8(s);
5975       packet->size    = stbi__get8(s);
5976       packet->type    = stbi__get8(s);
5977       packet->channel = stbi__get8(s);
5978 
5979       act_comp |= packet->channel;
5980 
5981       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5982       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5983    } while (chained);
5984 
5985    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5986 
5987    for(y=0; y<height; ++y) {
5988       int packet_idx;
5989 
5990       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5991          stbi__pic_packet *packet = &packets[packet_idx];
5992          stbi_uc *dest = result+y*width*4;
5993 
5994          switch (packet->type) {
5995             default:
5996                return stbi__errpuc("bad format","packet has bad compression type");
5997 
5998             case 0: {//uncompressed
5999                int x;
6000 
6001                for(x=0;x<width;++x, dest+=4)
6002                   if (!stbi__readval(s,packet->channel,dest))
6003                      return 0;
6004                break;
6005             }
6006 
6007             case 1://Pure RLE
6008                {
6009                   int left=width, i;
6010 
6011                   while (left>0) {
6012                      stbi_uc count,value[4];
6013 
6014                      count=stbi__get8(s);
6015                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
6016 
6017                      if (count > left)
6018                         count = (stbi_uc) left;
6019 
6020                      if (!stbi__readval(s,packet->channel,value))  return 0;
6021 
6022                      for(i=0; i<count; ++i,dest+=4)
6023                         stbi__copyval(packet->channel,dest,value);
6024                      left -= count;
6025                   }
6026                }
6027                break;
6028 
6029             case 2: {//Mixed RLE
6030                int left=width;
6031                while (left>0) {
6032                   int count = stbi__get8(s), i;
6033                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
6034 
6035                   if (count >= 128) { // Repeated
6036                      stbi_uc value[4];
6037 
6038                      if (count==128)
6039                         count = stbi__get16be(s);
6040                      else
6041                         count -= 127;
6042                      if (count > left)
6043                         return stbi__errpuc("bad file","scanline overrun");
6044 
6045                      if (!stbi__readval(s,packet->channel,value))
6046                         return 0;
6047 
6048                      for(i=0;i<count;++i, dest += 4)
6049                         stbi__copyval(packet->channel,dest,value);
6050                   } else { // Raw
6051                      ++count;
6052                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
6053 
6054                      for(i=0;i<count;++i, dest+=4)
6055                         if (!stbi__readval(s,packet->channel,dest))
6056                            return 0;
6057                   }
6058                   left-=count;
6059                }
6060                break;
6061             }
6062          }
6063       }
6064    }
6065 
6066    return result;
6067 }
6068 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)6069 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6070 {
6071    stbi_uc *result;
6072    int i, x,y, internal_comp;
6073    STBI_NOTUSED(ri);
6074 
6075    if (!comp) comp = &internal_comp;
6076 
6077    for (i=0; i<92; ++i)
6078       stbi__get8(s);
6079 
6080    x = stbi__get16be(s);
6081    y = stbi__get16be(s);
6082    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6083    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6084 
6085    stbi__get32be(s); //skip `ratio'
6086    stbi__get16be(s); //skip `fields'
6087    stbi__get16be(s); //skip `pad'
6088 
6089    // intermediate buffer is RGBA
6090    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6091    memset(result, 0xff, x*y*4);
6092 
6093    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6094       STBI_FREE(result);
6095       result=0;
6096    }
6097    *px = x;
6098    *py = y;
6099    if (req_comp == 0) req_comp = *comp;
6100    result=stbi__convert_format(result,4,req_comp,x,y);
6101 
6102    return result;
6103 }
6104 
stbi__pic_test(stbi__context * s)6105 static int stbi__pic_test(stbi__context *s)
6106 {
6107    int r = stbi__pic_test_core(s);
6108    stbi__rewind(s);
6109    return r;
6110 }
6111 #endif
6112 
6113 // *************************************************************************************************
6114 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6115 
6116 #ifndef STBI_NO_GIF
6117 typedef struct
6118 {
6119    stbi__int16 prefix;
6120    stbi_uc first;
6121    stbi_uc suffix;
6122 } stbi__gif_lzw;
6123 
6124 typedef struct
6125 {
6126    int w,h;
6127    stbi_uc *out;                 // output buffer (always 4 components)
6128    stbi_uc *background;          // The current "background" as far as a gif is concerned
6129    stbi_uc *history;
6130    int flags, bgindex, ratio, transparent, eflags;
6131    stbi_uc  pal[256][4];
6132    stbi_uc lpal[256][4];
6133    stbi__gif_lzw codes[8192];
6134    stbi_uc *color_table;
6135    int parse, step;
6136    int lflags;
6137    int start_x, start_y;
6138    int max_x, max_y;
6139    int cur_x, cur_y;
6140    int line_size;
6141    int delay;
6142 } stbi__gif;
6143 
stbi__gif_test_raw(stbi__context * s)6144 static int stbi__gif_test_raw(stbi__context *s)
6145 {
6146    int sz;
6147    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6148    sz = stbi__get8(s);
6149    if (sz != '9' && sz != '7') return 0;
6150    if (stbi__get8(s) != 'a') return 0;
6151    return 1;
6152 }
6153 
stbi__gif_test(stbi__context * s)6154 static int stbi__gif_test(stbi__context *s)
6155 {
6156    int r = stbi__gif_test_raw(s);
6157    stbi__rewind(s);
6158    return r;
6159 }
6160 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6161 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6162 {
6163    int i;
6164    for (i=0; i < num_entries; ++i) {
6165       pal[i][2] = stbi__get8(s);
6166       pal[i][1] = stbi__get8(s);
6167       pal[i][0] = stbi__get8(s);
6168       pal[i][3] = transp == i ? 0 : 255;
6169    }
6170 }
6171 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6172 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6173 {
6174    stbi_uc version;
6175    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6176       return stbi__err("not GIF", "Corrupt GIF");
6177 
6178    version = stbi__get8(s);
6179    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6180    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6181 
6182    stbi__g_failure_reason = "";
6183    g->w = stbi__get16le(s);
6184    g->h = stbi__get16le(s);
6185    g->flags = stbi__get8(s);
6186    g->bgindex = stbi__get8(s);
6187    g->ratio = stbi__get8(s);
6188    g->transparent = -1;
6189 
6190    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6191 
6192    if (is_info) return 1;
6193 
6194    if (g->flags & 0x80)
6195       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6196 
6197    return 1;
6198 }
6199 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6200 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6201 {
6202    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6203    if (!stbi__gif_header(s, g, comp, 1)) {
6204       STBI_FREE(g);
6205       stbi__rewind( s );
6206       return 0;
6207    }
6208    if (x) *x = g->w;
6209    if (y) *y = g->h;
6210    STBI_FREE(g);
6211    return 1;
6212 }
6213 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6214 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6215 {
6216    stbi_uc *p, *c;
6217    int idx;
6218 
6219    // recurse to decode the prefixes, since the linked-list is backwards,
6220    // and working backwards through an interleaved image would be nasty
6221    if (g->codes[code].prefix >= 0)
6222       stbi__out_gif_code(g, g->codes[code].prefix);
6223 
6224    if (g->cur_y >= g->max_y) return;
6225 
6226    idx = g->cur_x + g->cur_y;
6227    p = &g->out[idx];
6228    g->history[idx / 4] = 1;
6229 
6230    c = &g->color_table[g->codes[code].suffix * 4];
6231    if (c[3] > 128) { // don't render transparent pixels;
6232       p[0] = c[2];
6233       p[1] = c[1];
6234       p[2] = c[0];
6235       p[3] = c[3];
6236    }
6237    g->cur_x += 4;
6238 
6239    if (g->cur_x >= g->max_x) {
6240       g->cur_x = g->start_x;
6241       g->cur_y += g->step;
6242 
6243       while (g->cur_y >= g->max_y && g->parse > 0) {
6244          g->step = (1 << g->parse) * g->line_size;
6245          g->cur_y = g->start_y + (g->step >> 1);
6246          --g->parse;
6247       }
6248    }
6249 }
6250 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6251 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6252 {
6253    stbi_uc lzw_cs;
6254    stbi__int32 len, init_code;
6255    stbi__uint32 first;
6256    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6257    stbi__gif_lzw *p;
6258 
6259    lzw_cs = stbi__get8(s);
6260    if (lzw_cs > 12) return NULL;
6261    clear = 1 << lzw_cs;
6262    first = 1;
6263    codesize = lzw_cs + 1;
6264    codemask = (1 << codesize) - 1;
6265    bits = 0;
6266    valid_bits = 0;
6267    for (init_code = 0; init_code < clear; init_code++) {
6268       g->codes[init_code].prefix = -1;
6269       g->codes[init_code].first = (stbi_uc) init_code;
6270       g->codes[init_code].suffix = (stbi_uc) init_code;
6271    }
6272 
6273    // support no starting clear code
6274    avail = clear+2;
6275    oldcode = -1;
6276 
6277    len = 0;
6278    for(;;) {
6279       if (valid_bits < codesize) {
6280          if (len == 0) {
6281             len = stbi__get8(s); // start new block
6282             if (len == 0)
6283                return g->out;
6284          }
6285          --len;
6286          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6287          valid_bits += 8;
6288       } else {
6289          stbi__int32 code = bits & codemask;
6290          bits >>= codesize;
6291          valid_bits -= codesize;
6292          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6293          if (code == clear) {  // clear code
6294             codesize = lzw_cs + 1;
6295             codemask = (1 << codesize) - 1;
6296             avail = clear + 2;
6297             oldcode = -1;
6298             first = 0;
6299          } else if (code == clear + 1) { // end of stream code
6300             stbi__skip(s, len);
6301             while ((len = stbi__get8(s)) > 0)
6302                stbi__skip(s,len);
6303             return g->out;
6304          } else if (code <= avail) {
6305             if (first) {
6306                return stbi__errpuc("no clear code", "Corrupt GIF");
6307             }
6308 
6309             if (oldcode >= 0) {
6310                p = &g->codes[avail++];
6311                if (avail > 8192) {
6312                   return stbi__errpuc("too many codes", "Corrupt GIF");
6313                }
6314 
6315                p->prefix = (stbi__int16) oldcode;
6316                p->first = g->codes[oldcode].first;
6317                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6318             } else if (code == avail)
6319                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6320 
6321             stbi__out_gif_code(g, (stbi__uint16) code);
6322 
6323             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6324                codesize++;
6325                codemask = (1 << codesize) - 1;
6326             }
6327 
6328             oldcode = code;
6329          } else {
6330             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6331          }
6332       }
6333    }
6334 }
6335 
6336 // this function is designed to support animated gifs, although stb_image doesn't support it
6337 // two back is the image from two frames ago, used for a very specific disposal format
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp,stbi_uc * two_back)6338 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6339 {
6340    int dispose;
6341    int first_frame;
6342    int pi;
6343    int pcount;
6344 
6345    // on first frame, any non-written pixels get the background colour (non-transparent)
6346    first_frame = 0;
6347    if (g->out == 0) {
6348       if (!stbi__gif_header(s, g, comp,0))     return 0; // stbi__g_failure_reason set by stbi__gif_header
6349       g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6350       g->background = (stbi_uc *) stbi__malloc(4 * g->w * g->h);
6351       g->history = (stbi_uc *) stbi__malloc(g->w * g->h);
6352       if (g->out == 0)                      return stbi__errpuc("outofmem", "Out of memory");
6353 
6354       // image is treated as "tranparent" at the start - ie, nothing overwrites the current background;
6355       // background colour is only used for pixels that are not rendered first frame, after that "background"
6356       // color refers to teh color that was there the previous frame.
6357       memset( g->out, 0x00, 4 * g->w * g->h );
6358       memset( g->background, 0x00, 4 * g->w * g->h ); // state of the background (starts transparent)
6359       memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6360       first_frame = 1;
6361    } else {
6362       // second frame - how do we dispoase of the previous one?
6363       dispose = (g->eflags & 0x1C) >> 2;
6364       pcount = g->w * g->h;
6365 
6366       if ((dispose == 3) && (two_back == 0)) {
6367          dispose = 2; // if I don't have an image to revert back to, default to the old background
6368       }
6369 
6370       if (dispose == 3) { // use previous graphic
6371          for (pi = 0; pi < pcount; ++pi) {
6372             if (g->history[pi]) {
6373                memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6374             }
6375          }
6376       } else if (dispose == 2) {
6377          // restore what was changed last frame to background before that frame;
6378          for (pi = 0; pi < pcount; ++pi) {
6379             if (g->history[pi]) {
6380                memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6381             }
6382          }
6383       } else {
6384          // This is a non-disposal case eithe way, so just
6385          // leave the pixels as is, and they will become the new background
6386          // 1: do not dispose
6387          // 0:  not specified.
6388       }
6389 
6390       // background is what out is after the undoing of the previou frame;
6391       memcpy( g->background, g->out, 4 * g->w * g->h );
6392    }
6393 
6394    // clear my history;
6395    memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6396 
6397    for (;;) {
6398       int tag = stbi__get8(s);
6399       switch (tag) {
6400          case 0x2C: /* Image Descriptor */
6401          {
6402             stbi__int32 x, y, w, h;
6403             stbi_uc *o;
6404 
6405             x = stbi__get16le(s);
6406             y = stbi__get16le(s);
6407             w = stbi__get16le(s);
6408             h = stbi__get16le(s);
6409             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6410                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6411 
6412             g->line_size = g->w * 4;
6413             g->start_x = x * 4;
6414             g->start_y = y * g->line_size;
6415             g->max_x   = g->start_x + w * 4;
6416             g->max_y   = g->start_y + h * g->line_size;
6417             g->cur_x   = g->start_x;
6418             g->cur_y   = g->start_y;
6419 
6420             g->lflags = stbi__get8(s);
6421 
6422             if (g->lflags & 0x40) {
6423                g->step = 8 * g->line_size; // first interlaced spacing
6424                g->parse = 3;
6425             } else {
6426                g->step = g->line_size;
6427                g->parse = 0;
6428             }
6429 
6430             if (g->lflags & 0x80) {
6431                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6432                g->color_table = (stbi_uc *) g->lpal;
6433             } else if (g->flags & 0x80) {
6434                g->color_table = (stbi_uc *) g->pal;
6435             } else
6436                return stbi__errpuc("missing color table", "Corrupt GIF");
6437 
6438             o = stbi__process_gif_raster(s, g);
6439             if (o == NULL) return NULL;
6440 
6441             // if this was the first frame,
6442             pcount = g->w * g->h;
6443             if (first_frame && (g->bgindex > 0)) {
6444                // if first frame, any pixel not drawn to gets the background color
6445                for (pi = 0; pi < pcount; ++pi) {
6446                   if (g->history[pi] == 0) {
6447                      g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6448                      memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6449                   }
6450                }
6451             }
6452 
6453             return o;
6454          }
6455 
6456          case 0x21: // Comment Extension.
6457          {
6458             int len;
6459             int ext = stbi__get8(s);
6460             if (ext == 0xF9) { // Graphic Control Extension.
6461                len = stbi__get8(s);
6462                if (len == 4) {
6463                   g->eflags = stbi__get8(s);
6464                   g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6465 
6466                   // unset old transparent
6467                   if (g->transparent >= 0) {
6468                      g->pal[g->transparent][3] = 255;
6469                   }
6470                   if (g->eflags & 0x01) {
6471                      g->transparent = stbi__get8(s);
6472                      if (g->transparent >= 0) {
6473                         g->pal[g->transparent][3] = 0;
6474                      }
6475                   } else {
6476                      // don't need transparent
6477                      stbi__skip(s, 1);
6478                      g->transparent = -1;
6479                   }
6480                } else {
6481                   stbi__skip(s, len);
6482                   break;
6483                }
6484             }
6485             while ((len = stbi__get8(s)) != 0) {
6486                stbi__skip(s, len);
6487             }
6488             break;
6489          }
6490 
6491          case 0x3B: // gif stream termination code
6492             return (stbi_uc *) s; // using '1' causes warning on some compilers
6493 
6494          default:
6495             return stbi__errpuc("unknown code", "Corrupt GIF");
6496       }
6497    }
6498 }
6499 
stbi__load_gif_main(stbi__context * s,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)6500 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6501 {
6502    if (stbi__gif_test(s)) {
6503       int layers = 0;
6504       stbi_uc *u = 0;
6505       stbi_uc *out = 0;
6506       stbi_uc *two_back = 0;
6507       stbi__gif g;
6508       int stride;
6509       memset(&g, 0, sizeof(g));
6510       if (delays) {
6511          *delays = 0;
6512       }
6513 
6514       do {
6515          u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6516          if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6517 
6518          if (u) {
6519             *x = g.w;
6520             *y = g.h;
6521             ++layers;
6522             stride = g.w * g.h * 4;
6523 
6524             if (out) {
6525                out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
6526                if (delays) {
6527                   *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
6528                }
6529             } else {
6530                out = (stbi_uc*)stbi__malloc( layers * stride );
6531                if (delays) {
6532                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
6533                }
6534             }
6535             memcpy( out + ((layers - 1) * stride), u, stride );
6536             if (layers >= 2) {
6537                two_back = out - 2 * stride;
6538             }
6539 
6540             if (delays) {
6541                (*delays)[layers - 1U] = g.delay;
6542             }
6543          }
6544       } while (u != 0);
6545 
6546       // free temp buffer;
6547       STBI_FREE(g.out);
6548       STBI_FREE(g.history);
6549       STBI_FREE(g.background);
6550 
6551       // do the final conversion after loading everything;
6552       if (req_comp && req_comp != 4)
6553          out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6554 
6555       *z = layers;
6556       return out;
6557    } else {
6558       return stbi__errpuc("not GIF", "Image was not as a gif type.");
6559    }
6560 }
6561 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6562 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6563 {
6564    stbi_uc *u = 0;
6565    stbi__gif g;
6566    memset(&g, 0, sizeof(g));
6567 
6568    u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6569    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6570    if (u) {
6571       *x = g.w;
6572       *y = g.h;
6573 
6574       // moved conversion to after successful load so that the same
6575       // can be done for multiple frames.
6576       if (req_comp && req_comp != 4)
6577          u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6578    }
6579 
6580    // free buffers needed for multiple frame loading;
6581    STBI_FREE(g.history);
6582    STBI_FREE(g.background);
6583 
6584    return u;
6585 }
6586 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6587 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6588 {
6589    return stbi__gif_info_raw(s,x,y,comp);
6590 }
6591 #endif
6592 
6593 // *************************************************************************************************
6594 // Radiance RGBE HDR loader
6595 // originally by Nicolas Schulz
6596 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6597 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6598 {
6599    int i;
6600    for (i=0; signature[i]; ++i)
6601       if (stbi__get8(s) != signature[i])
6602           return 0;
6603    stbi__rewind(s);
6604    return 1;
6605 }
6606 
stbi__hdr_test(stbi__context * s)6607 static int stbi__hdr_test(stbi__context* s)
6608 {
6609    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6610    stbi__rewind(s);
6611    if(!r) {
6612        r = stbi__hdr_test_core(s, "#?RGBE\n");
6613        stbi__rewind(s);
6614    }
6615    return r;
6616 }
6617 
6618 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6619 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6620 {
6621    int len=0;
6622    char c = '\0';
6623 
6624    c = (char) stbi__get8(z);
6625 
6626    while (!stbi__at_eof(z) && c != '\n') {
6627       buffer[len++] = c;
6628       if (len == STBI__HDR_BUFLEN-1) {
6629          // flush to end of line
6630          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6631             ;
6632          break;
6633       }
6634       c = (char) stbi__get8(z);
6635    }
6636 
6637    buffer[len] = 0;
6638    return buffer;
6639 }
6640 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6641 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6642 {
6643    if ( input[3] != 0 ) {
6644       float f1;
6645       // Exponent
6646       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6647       if (req_comp <= 2)
6648          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6649       else {
6650          output[0] = input[0] * f1;
6651          output[1] = input[1] * f1;
6652          output[2] = input[2] * f1;
6653       }
6654       if (req_comp == 2) output[1] = 1;
6655       if (req_comp == 4) output[3] = 1;
6656    } else {
6657       switch (req_comp) {
6658          case 4: output[3] = 1; /* fallthrough */
6659          case 3: output[0] = output[1] = output[2] = 0;
6660                  break;
6661          case 2: output[1] = 1; /* fallthrough */
6662          case 1: output[0] = 0;
6663                  break;
6664       }
6665    }
6666 }
6667 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6668 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6669 {
6670    char buffer[STBI__HDR_BUFLEN];
6671    char *token;
6672    int valid = 0;
6673    int width, height;
6674    stbi_uc *scanline;
6675    float *hdr_data;
6676    int len;
6677    unsigned char count, value;
6678    int i, j, k, c1,c2, z;
6679    const char *headerToken;
6680    STBI_NOTUSED(ri);
6681 
6682    // Check identifier
6683    headerToken = stbi__hdr_gettoken(s,buffer);
6684    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6685       return stbi__errpf("not HDR", "Corrupt HDR image");
6686 
6687    // Parse header
6688    for(;;) {
6689       token = stbi__hdr_gettoken(s,buffer);
6690       if (token[0] == 0) break;
6691       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6692    }
6693 
6694    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6695 
6696    // Parse width and height
6697    // can't use sscanf() if we're not using stdio!
6698    token = stbi__hdr_gettoken(s,buffer);
6699    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6700    token += 3;
6701    height = (int) strtol(token, &token, 10);
6702    while (*token == ' ') ++token;
6703    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6704    token += 3;
6705    width = (int) strtol(token, NULL, 10);
6706 
6707    *x = width;
6708    *y = height;
6709 
6710    if (comp) *comp = 3;
6711    if (req_comp == 0) req_comp = 3;
6712 
6713    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6714       return stbi__errpf("too large", "HDR image is too large");
6715 
6716    // Read data
6717    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6718    if (!hdr_data)
6719       return stbi__errpf("outofmem", "Out of memory");
6720 
6721    // Load image data
6722    // image data is stored as some number of sca
6723    if ( width < 8 || width >= 32768) {
6724       // Read flat data
6725       for (j=0; j < height; ++j) {
6726          for (i=0; i < width; ++i) {
6727             stbi_uc rgbe[4];
6728            main_decode_loop:
6729             stbi__getn(s, rgbe, 4);
6730             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6731          }
6732       }
6733    } else {
6734       // Read RLE-encoded data
6735       scanline = NULL;
6736 
6737       for (j = 0; j < height; ++j) {
6738          c1 = stbi__get8(s);
6739          c2 = stbi__get8(s);
6740          len = stbi__get8(s);
6741          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6742             // not run-length encoded, so we have to actually use THIS data as a decoded
6743             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6744             stbi_uc rgbe[4];
6745             rgbe[0] = (stbi_uc) c1;
6746             rgbe[1] = (stbi_uc) c2;
6747             rgbe[2] = (stbi_uc) len;
6748             rgbe[3] = (stbi_uc) stbi__get8(s);
6749             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6750             i = 1;
6751             j = 0;
6752             STBI_FREE(scanline);
6753             goto main_decode_loop; // yes, this makes no sense
6754          }
6755          len <<= 8;
6756          len |= stbi__get8(s);
6757          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6758          if (scanline == NULL) {
6759             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6760             if (!scanline) {
6761                STBI_FREE(hdr_data);
6762                return stbi__errpf("outofmem", "Out of memory");
6763             }
6764          }
6765 
6766          for (k = 0; k < 4; ++k) {
6767             int nleft;
6768             i = 0;
6769             while ((nleft = width - i) > 0) {
6770                count = stbi__get8(s);
6771                if (count > 128) {
6772                   // Run
6773                   value = stbi__get8(s);
6774                   count -= 128;
6775                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6776                   for (z = 0; z < count; ++z)
6777                      scanline[i++ * 4 + k] = value;
6778                } else {
6779                   // Dump
6780                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6781                   for (z = 0; z < count; ++z)
6782                      scanline[i++ * 4 + k] = stbi__get8(s);
6783                }
6784             }
6785          }
6786          for (i=0; i < width; ++i)
6787             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6788       }
6789       if (scanline)
6790          STBI_FREE(scanline);
6791    }
6792 
6793    return hdr_data;
6794 }
6795 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6796 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6797 {
6798    char buffer[STBI__HDR_BUFLEN];
6799    char *token;
6800    int valid = 0;
6801    int dummy;
6802 
6803    if (!x) x = &dummy;
6804    if (!y) y = &dummy;
6805    if (!comp) comp = &dummy;
6806 
6807    if (stbi__hdr_test(s) == 0) {
6808        stbi__rewind( s );
6809        return 0;
6810    }
6811 
6812    for(;;) {
6813       token = stbi__hdr_gettoken(s,buffer);
6814       if (token[0] == 0) break;
6815       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6816    }
6817 
6818    if (!valid) {
6819        stbi__rewind( s );
6820        return 0;
6821    }
6822    token = stbi__hdr_gettoken(s,buffer);
6823    if (strncmp(token, "-Y ", 3)) {
6824        stbi__rewind( s );
6825        return 0;
6826    }
6827    token += 3;
6828    *y = (int) strtol(token, &token, 10);
6829    while (*token == ' ') ++token;
6830    if (strncmp(token, "+X ", 3)) {
6831        stbi__rewind( s );
6832        return 0;
6833    }
6834    token += 3;
6835    *x = (int) strtol(token, NULL, 10);
6836    *comp = 3;
6837    return 1;
6838 }
6839 #endif // STBI_NO_HDR
6840 
6841 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6842 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6843 {
6844    void *p;
6845    stbi__bmp_data info;
6846 
6847    info.all_a = 255;
6848    p = stbi__bmp_parse_header(s, &info);
6849    stbi__rewind( s );
6850    if (p == NULL)
6851       return 0;
6852    if (x) *x = s->img_x;
6853    if (y) *y = s->img_y;
6854    if (comp) *comp = info.ma ? 4 : 3;
6855    return 1;
6856 }
6857 #endif
6858 
6859 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6860 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6861 {
6862    int channelCount, dummy, depth;
6863    if (!x) x = &dummy;
6864    if (!y) y = &dummy;
6865    if (!comp) comp = &dummy;
6866    if (stbi__get32be(s) != 0x38425053) {
6867        stbi__rewind( s );
6868        return 0;
6869    }
6870    if (stbi__get16be(s) != 1) {
6871        stbi__rewind( s );
6872        return 0;
6873    }
6874    stbi__skip(s, 6);
6875    channelCount = stbi__get16be(s);
6876    if (channelCount < 0 || channelCount > 16) {
6877        stbi__rewind( s );
6878        return 0;
6879    }
6880    *y = stbi__get32be(s);
6881    *x = stbi__get32be(s);
6882    depth = stbi__get16be(s);
6883    if (depth != 8 && depth != 16) {
6884        stbi__rewind( s );
6885        return 0;
6886    }
6887    if (stbi__get16be(s) != 3) {
6888        stbi__rewind( s );
6889        return 0;
6890    }
6891    *comp = 4;
6892    return 1;
6893 }
6894 
stbi__psd_is16(stbi__context * s)6895 static int stbi__psd_is16(stbi__context *s)
6896 {
6897    int channelCount, dummy, depth;
6898    if (stbi__get32be(s) != 0x38425053) {
6899        stbi__rewind( s );
6900        return 0;
6901    }
6902    if (stbi__get16be(s) != 1) {
6903        stbi__rewind( s );
6904        return 0;
6905    }
6906    stbi__skip(s, 6);
6907    channelCount = stbi__get16be(s);
6908    if (channelCount < 0 || channelCount > 16) {
6909        stbi__rewind( s );
6910        return 0;
6911    }
6912    dummy = stbi__get32be(s);
6913    dummy = stbi__get32be(s);
6914    depth = stbi__get16be(s);
6915    if (depth != 16) {
6916        stbi__rewind( s );
6917        return 0;
6918    }
6919    return 1;
6920 }
6921 #endif
6922 
6923 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6924 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6925 {
6926    int act_comp=0,num_packets=0,chained,dummy;
6927    stbi__pic_packet packets[10];
6928 
6929    if (!x) x = &dummy;
6930    if (!y) y = &dummy;
6931    if (!comp) comp = &dummy;
6932 
6933    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6934       stbi__rewind(s);
6935       return 0;
6936    }
6937 
6938    stbi__skip(s, 88);
6939 
6940    *x = stbi__get16be(s);
6941    *y = stbi__get16be(s);
6942    if (stbi__at_eof(s)) {
6943       stbi__rewind( s);
6944       return 0;
6945    }
6946    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6947       stbi__rewind( s );
6948       return 0;
6949    }
6950 
6951    stbi__skip(s, 8);
6952 
6953    do {
6954       stbi__pic_packet *packet;
6955 
6956       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6957          return 0;
6958 
6959       packet = &packets[num_packets++];
6960       chained = stbi__get8(s);
6961       packet->size    = stbi__get8(s);
6962       packet->type    = stbi__get8(s);
6963       packet->channel = stbi__get8(s);
6964       act_comp |= packet->channel;
6965 
6966       if (stbi__at_eof(s)) {
6967           stbi__rewind( s );
6968           return 0;
6969       }
6970       if (packet->size != 8) {
6971           stbi__rewind( s );
6972           return 0;
6973       }
6974    } while (chained);
6975 
6976    *comp = (act_comp & 0x10 ? 4 : 3);
6977 
6978    return 1;
6979 }
6980 #endif
6981 
6982 // *************************************************************************************************
6983 // Portable Gray Map and Portable Pixel Map loader
6984 // by Ken Miller
6985 //
6986 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6987 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6988 //
6989 // Known limitations:
6990 //    Does not support comments in the header section
6991 //    Does not support ASCII image data (formats P2 and P3)
6992 //    Does not support 16-bit-per-channel
6993 
6994 #ifndef STBI_NO_PNM
6995 
stbi__pnm_test(stbi__context * s)6996 static int      stbi__pnm_test(stbi__context *s)
6997 {
6998    char p, t;
6999    p = (char) stbi__get8(s);
7000    t = (char) stbi__get8(s);
7001    if (p != 'P' || (t != '5' && t != '6')) {
7002        stbi__rewind( s );
7003        return 0;
7004    }
7005    return 1;
7006 }
7007 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)7008 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7009 {
7010    stbi_uc *out;
7011    STBI_NOTUSED(ri);
7012 
7013    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
7014       return 0;
7015 
7016    *x = s->img_x;
7017    *y = s->img_y;
7018    if (comp) *comp = s->img_n;
7019 
7020    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7021       return stbi__errpuc("too large", "PNM too large");
7022 
7023    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
7024    if (!out) return stbi__errpuc("outofmem", "Out of memory");
7025    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
7026 
7027    if (req_comp && req_comp != s->img_n) {
7028       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7029       if (out == NULL) return out; // stbi__convert_format frees input on failure
7030    }
7031    return out;
7032 }
7033 
stbi__pnm_isspace(char c)7034 static int      stbi__pnm_isspace(char c)
7035 {
7036    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7037 }
7038 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)7039 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7040 {
7041    for (;;) {
7042       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7043          *c = (char) stbi__get8(s);
7044 
7045       if (stbi__at_eof(s) || *c != '#')
7046          break;
7047 
7048       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7049          *c = (char) stbi__get8(s);
7050    }
7051 }
7052 
stbi__pnm_isdigit(char c)7053 static int      stbi__pnm_isdigit(char c)
7054 {
7055    return c >= '0' && c <= '9';
7056 }
7057 
stbi__pnm_getinteger(stbi__context * s,char * c)7058 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
7059 {
7060    int value = 0;
7061 
7062    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7063       value = value*10 + (*c - '0');
7064       *c = (char) stbi__get8(s);
7065    }
7066 
7067    return value;
7068 }
7069 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)7070 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7071 {
7072    int maxv, dummy;
7073    char c, p, t;
7074 
7075    if (!x) x = &dummy;
7076    if (!y) y = &dummy;
7077    if (!comp) comp = &dummy;
7078 
7079    stbi__rewind(s);
7080 
7081    // Get identifier
7082    p = (char) stbi__get8(s);
7083    t = (char) stbi__get8(s);
7084    if (p != 'P' || (t != '5' && t != '6')) {
7085        stbi__rewind(s);
7086        return 0;
7087    }
7088 
7089    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
7090 
7091    c = (char) stbi__get8(s);
7092    stbi__pnm_skip_whitespace(s, &c);
7093 
7094    *x = stbi__pnm_getinteger(s, &c); // read width
7095    stbi__pnm_skip_whitespace(s, &c);
7096 
7097    *y = stbi__pnm_getinteger(s, &c); // read height
7098    stbi__pnm_skip_whitespace(s, &c);
7099 
7100    maxv = stbi__pnm_getinteger(s, &c);  // read max value
7101 
7102    if (maxv > 255)
7103       return stbi__err("max value > 255", "PPM image not 8-bit");
7104    else
7105       return 1;
7106 }
7107 #endif
7108 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)7109 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7110 {
7111    #ifndef STBI_NO_JPEG
7112    if (stbi__jpeg_info(s, x, y, comp)) return 1;
7113    #endif
7114 
7115    #ifndef STBI_NO_PNG
7116    if (stbi__png_info(s, x, y, comp))  return 1;
7117    #endif
7118 
7119    #ifndef STBI_NO_GIF
7120    if (stbi__gif_info(s, x, y, comp))  return 1;
7121    #endif
7122 
7123    #ifndef STBI_NO_BMP
7124    if (stbi__bmp_info(s, x, y, comp))  return 1;
7125    #endif
7126 
7127    #ifndef STBI_NO_PSD
7128    if (stbi__psd_info(s, x, y, comp))  return 1;
7129    #endif
7130 
7131    #ifndef STBI_NO_PIC
7132    if (stbi__pic_info(s, x, y, comp))  return 1;
7133    #endif
7134 
7135    #ifndef STBI_NO_PNM
7136    if (stbi__pnm_info(s, x, y, comp))  return 1;
7137    #endif
7138 
7139    #ifndef STBI_NO_HDR
7140    if (stbi__hdr_info(s, x, y, comp))  return 1;
7141    #endif
7142 
7143    // test tga last because it's a crappy test!
7144    #ifndef STBI_NO_TGA
7145    if (stbi__tga_info(s, x, y, comp))
7146        return 1;
7147    #endif
7148    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7149 }
7150 
stbi__is_16_main(stbi__context * s)7151 static int stbi__is_16_main(stbi__context *s)
7152 {
7153    #ifndef STBI_NO_PNG
7154    if (stbi__png_is16(s))  return 1;
7155    #endif
7156 
7157    #ifndef STBI_NO_PSD
7158    if (stbi__psd_is16(s))  return 1;
7159    #endif
7160 
7161    return 0;
7162 }
7163 
7164 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)7165 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7166 {
7167     FILE *f = stbi__fopen(filename, "rb");
7168     int result;
7169     if (!f) return stbi__err("can't fopen", "Unable to open file");
7170     result = stbi_info_from_file(f, x, y, comp);
7171     fclose(f);
7172     return result;
7173 }
7174 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)7175 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7176 {
7177    int r;
7178    stbi__context s;
7179    long pos = ftell(f);
7180    stbi__start_file(&s, f);
7181    r = stbi__info_main(&s,x,y,comp);
7182    fseek(f,pos,SEEK_SET);
7183    return r;
7184 }
7185 
stbi_is_16_bit(char const * filename)7186 STBIDEF int stbi_is_16_bit(char const *filename)
7187 {
7188     FILE *f = stbi__fopen(filename, "rb");
7189     int result;
7190     if (!f) return stbi__err("can't fopen", "Unable to open file");
7191     result = stbi_is_16_bit_from_file(f);
7192     fclose(f);
7193     return result;
7194 }
7195 
stbi_is_16_bit_from_file(FILE * f)7196 STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7197 {
7198    int r;
7199    stbi__context s;
7200    long pos = ftell(f);
7201    stbi__start_file(&s, f);
7202    r = stbi__is_16_main(&s);
7203    fseek(f,pos,SEEK_SET);
7204    return r;
7205 }
7206 #endif // !STBI_NO_STDIO
7207 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)7208 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7209 {
7210    stbi__context s;
7211    stbi__start_mem(&s,buffer,len);
7212    return stbi__info_main(&s,x,y,comp);
7213 }
7214 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)7215 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7216 {
7217    stbi__context s;
7218    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7219    return stbi__info_main(&s,x,y,comp);
7220 }
7221 
stbi_is_16_bit_from_memory(stbi_uc const * buffer,int len)7222 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7223 {
7224    stbi__context s;
7225    stbi__start_mem(&s,buffer,len);
7226    return stbi__is_16_main(&s);
7227 }
7228 
stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * c,void * user)7229 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7230 {
7231    stbi__context s;
7232    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7233    return stbi__is_16_main(&s);
7234 }
7235 
7236 #endif // STB_IMAGE_IMPLEMENTATION
7237 
7238 /*
7239    revision history:
7240       2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7241                          1-bit BMP
7242                          *_is_16_bit api
7243                          avoid warnings
7244       2.16  (2017-07-23) all functions have 16-bit variants;
7245                          STBI_NO_STDIO works again;
7246                          compilation fixes;
7247                          fix rounding in unpremultiply;
7248                          optimize vertical flip;
7249                          disable raw_len validation;
7250                          documentation fixes
7251       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7252                          warning fixes; disable run-time SSE detection on gcc;
7253                          uniform handling of optional "return" values;
7254                          thread-safe initialization of zlib tables
7255       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7256       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7257       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7258       2.11  (2016-04-02) allocate large structures on the stack
7259                          remove white matting for transparent PSD
7260                          fix reported channel count for PNG & BMP
7261                          re-enable SSE2 in non-gcc 64-bit
7262                          support RGB-formatted JPEG
7263                          read 16-bit PNGs (only as 8-bit)
7264       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7265       2.09  (2016-01-16) allow comments in PNM files
7266                          16-bit-per-pixel TGA (not bit-per-component)
7267                          info() for TGA could break due to .hdr handling
7268                          info() for BMP to shares code instead of sloppy parse
7269                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7270                          code cleanup
7271       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7272       2.07  (2015-09-13) fix compiler warnings
7273                          partial animated GIF support
7274                          limited 16-bpc PSD support
7275                          #ifdef unused functions
7276                          bug with < 92 byte PIC,PNM,HDR,TGA
7277       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7278       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7279       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7280       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7281                          stbi_set_flip_vertically_on_load (nguillemot)
7282                          fix NEON support; fix mingw support
7283       2.02  (2015-01-19) fix incorrect assert, fix warning
7284       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7285       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7286       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7287                          progressive JPEG (stb)
7288                          PGM/PPM support (Ken Miller)
7289                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7290                          GIF bugfix -- seemingly never worked
7291                          STBI_NO_*, STBI_ONLY_*
7292       1.48  (2014-12-14) fix incorrectly-named assert()
7293       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7294                          optimize PNG (ryg)
7295                          fix bug in interlaced PNG with user-specified channel count (stb)
7296       1.46  (2014-08-26)
7297               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7298       1.45  (2014-08-16)
7299               fix MSVC-ARM internal compiler error by wrapping malloc
7300       1.44  (2014-08-07)
7301               various warning fixes from Ronny Chevalier
7302       1.43  (2014-07-15)
7303               fix MSVC-only compiler problem in code changed in 1.42
7304       1.42  (2014-07-09)
7305               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7306               fixes to stbi__cleanup_jpeg path
7307               added STBI_ASSERT to avoid requiring assert.h
7308       1.41  (2014-06-25)
7309               fix search&replace from 1.36 that messed up comments/error messages
7310       1.40  (2014-06-22)
7311               fix gcc struct-initialization warning
7312       1.39  (2014-06-15)
7313               fix to TGA optimization when req_comp != number of components in TGA;
7314               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7315               add support for BMP version 5 (more ignored fields)
7316       1.38  (2014-06-06)
7317               suppress MSVC warnings on integer casts truncating values
7318               fix accidental rename of 'skip' field of I/O
7319       1.37  (2014-06-04)
7320               remove duplicate typedef
7321       1.36  (2014-06-03)
7322               convert to header file single-file library
7323               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7324       1.35  (2014-05-27)
7325               various warnings
7326               fix broken STBI_SIMD path
7327               fix bug where stbi_load_from_file no longer left file pointer in correct place
7328               fix broken non-easy path for 32-bit BMP (possibly never used)
7329               TGA optimization by Arseny Kapoulkine
7330       1.34  (unknown)
7331               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7332       1.33  (2011-07-14)
7333               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7334       1.32  (2011-07-13)
7335               support for "info" function for all supported filetypes (SpartanJ)
7336       1.31  (2011-06-20)
7337               a few more leak fixes, bug in PNG handling (SpartanJ)
7338       1.30  (2011-06-11)
7339               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7340               removed deprecated format-specific test/load functions
7341               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7342               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7343               fix inefficiency in decoding 32-bit BMP (David Woo)
7344       1.29  (2010-08-16)
7345               various warning fixes from Aurelien Pocheville
7346       1.28  (2010-08-01)
7347               fix bug in GIF palette transparency (SpartanJ)
7348       1.27  (2010-08-01)
7349               cast-to-stbi_uc to fix warnings
7350       1.26  (2010-07-24)
7351               fix bug in file buffering for PNG reported by SpartanJ
7352       1.25  (2010-07-17)
7353               refix trans_data warning (Won Chun)
7354       1.24  (2010-07-12)
7355               perf improvements reading from files on platforms with lock-heavy fgetc()
7356               minor perf improvements for jpeg
7357               deprecated type-specific functions so we'll get feedback if they're needed
7358               attempt to fix trans_data warning (Won Chun)
7359       1.23    fixed bug in iPhone support
7360       1.22  (2010-07-10)
7361               removed image *writing* support
7362               stbi_info support from Jetro Lauha
7363               GIF support from Jean-Marc Lienher
7364               iPhone PNG-extensions from James Brown
7365               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7366       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7367       1.20    added support for Softimage PIC, by Tom Seddon
7368       1.19    bug in interlaced PNG corruption check (found by ryg)
7369       1.18  (2008-08-02)
7370               fix a threading bug (local mutable static)
7371       1.17    support interlaced PNG
7372       1.16    major bugfix - stbi__convert_format converted one too many pixels
7373       1.15    initialize some fields for thread safety
7374       1.14    fix threadsafe conversion bug
7375               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7376       1.13    threadsafe
7377       1.12    const qualifiers in the API
7378       1.11    Support installable IDCT, colorspace conversion routines
7379       1.10    Fixes for 64-bit (don't use "unsigned long")
7380               optimized upsampling by Fabian "ryg" Giesen
7381       1.09    Fix format-conversion for PSD code (bad global variables!)
7382       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7383       1.07    attempt to fix C++ warning/errors again
7384       1.06    attempt to fix C++ warning/errors again
7385       1.05    fix TGA loading to return correct *comp and use good luminance calc
7386       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7387       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7388       1.02    support for (subset of) HDR files, float interface for preferred access to them
7389       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7390               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7391       1.00    interface to zlib that skips zlib header
7392       0.99    correct handling of alpha in palette
7393       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7394       0.97    jpeg errors on too large a file; also catch another malloc failure
7395       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7396       0.95    during header scan, seek to markers in case of padding
7397       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7398       0.93    handle jpegtran output; verbose errors
7399       0.92    read 4,8,16,24,32-bit BMP files of several formats
7400       0.91    output 24-bit Windows 3.0 BMP files
7401       0.90    fix a few more warnings; bump version number to approach 1.0
7402       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7403       0.60    fix compiling as c++
7404       0.59    fix warnings: merge Dave Moore's -Wall fixes
7405       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7406       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7407       0.56    fix bug: zlib uncompressed mode len vs. nlen
7408       0.55    fix bug: restart_interval not initialized to 0
7409       0.54    allow NULL for 'int *comp'
7410       0.53    fix bug in png 3->4; speedup png decoding
7411       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7412       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7413               on 'test' only check type, not whether we support this variant
7414       0.50  (2006-11-19)
7415               first released version
7416 */
7417 
7418 
7419 /*
7420 ------------------------------------------------------------------------------
7421 This software is available under 2 licenses -- choose whichever you prefer.
7422 ------------------------------------------------------------------------------
7423 ALTERNATIVE A - MIT License
7424 Copyright (c) 2017 Sean Barrett
7425 Permission is hereby granted, free of charge, to any person obtaining a copy of
7426 this software and associated documentation files (the "Software"), to deal in
7427 the Software without restriction, including without limitation the rights to
7428 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7429 of the Software, and to permit persons to whom the Software is furnished to do
7430 so, subject to the following conditions:
7431 The above copyright notice and this permission notice shall be included in all
7432 copies or substantial portions of the Software.
7433 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7434 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7435 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7436 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7437 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7438 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7439 SOFTWARE.
7440 ------------------------------------------------------------------------------
7441 ALTERNATIVE B - Public Domain (www.unlicense.org)
7442 This is free and unencumbered software released into the public domain.
7443 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7444 software, either in source code form or as a compiled binary, for any purpose,
7445 commercial or non-commercial, and by any means.
7446 In jurisdictions that recognize copyright laws, the author or authors of this
7447 software dedicate any and all copyright interest in the software to the public
7448 domain. We make this dedication for the benefit of the public at large and to
7449 the detriment of our heirs and successors. We intend this dedication to be an
7450 overt act of relinquishment in perpetuity of all present and future rights to
7451 this software under copyright law.
7452 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7453 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7454 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7455 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7456 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7457 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7458 ------------------------------------------------------------------------------
7459 */
7460